1 /*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20 #include "exec/target_page.h"
21 #include "translate.h"
22 #include "translate-a64.h"
23 #include "qemu/log.h"
24 #include "arm_ldst.h"
25 #include "semihosting/semihost.h"
26 #include "cpregs.h"
27
28 static TCGv_i64 cpu_X[32];
29 static TCGv_i64 cpu_pc;
30
31 /* Load/store exclusive handling */
32 static TCGv_i64 cpu_exclusive_high;
33
34 static const char *regnames[] = {
35 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
36 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
37 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
38 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
39 };
40
41 enum a64_shift_type {
42 A64_SHIFT_TYPE_LSL = 0,
43 A64_SHIFT_TYPE_LSR = 1,
44 A64_SHIFT_TYPE_ASR = 2,
45 A64_SHIFT_TYPE_ROR = 3
46 };
47
48 /*
49 * Helpers for extracting complex instruction fields
50 */
51
52 /*
53 * For load/store with an unsigned 12 bit immediate scaled by the element
54 * size. The input has the immediate field in bits [14:3] and the element
55 * size in [2:0].
56 */
uimm_scaled(DisasContext * s,int x)57 static int uimm_scaled(DisasContext *s, int x)
58 {
59 unsigned imm = x >> 3;
60 unsigned scale = extract32(x, 0, 3);
61 return imm << scale;
62 }
63
64 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
scale_by_log2_tag_granule(DisasContext * s,int x)65 static int scale_by_log2_tag_granule(DisasContext *s, int x)
66 {
67 return x << LOG2_TAG_GRANULE;
68 }
69
70 /*
71 * Include the generated decoders.
72 */
73
74 #include "decode-sme-fa64.c.inc"
75 #include "decode-a64.c.inc"
76
77 /* initialize TCG globals. */
a64_translate_init(void)78 void a64_translate_init(void)
79 {
80 int i;
81
82 cpu_pc = tcg_global_mem_new_i64(tcg_env,
83 offsetof(CPUARMState, pc),
84 "pc");
85 for (i = 0; i < 32; i++) {
86 cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
87 offsetof(CPUARMState, xregs[i]),
88 regnames[i]);
89 }
90
91 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
92 offsetof(CPUARMState, exclusive_high), "exclusive_high");
93 }
94
95 /*
96 * Return the core mmu_idx to use for A64 load/store insns which
97 * have a "unprivileged load/store" variant. Those insns access
98 * EL0 if executed from an EL which has control over EL0 (usually
99 * EL1) but behave like normal loads and stores if executed from
100 * elsewhere (eg EL3).
101 *
102 * @unpriv : true for the unprivileged encoding; false for the
103 * normal encoding (in which case we will return the same
104 * thing as get_mem_index().
105 */
get_a64_user_mem_index(DisasContext * s,bool unpriv)106 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
107 {
108 /*
109 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
110 * which is the usual mmu_idx for this cpu state.
111 */
112 ARMMMUIdx useridx = s->mmu_idx;
113
114 if (unpriv && s->unpriv) {
115 /*
116 * We have pre-computed the condition for AccType_UNPRIV.
117 * Therefore we should never get here with a mmu_idx for
118 * which we do not know the corresponding user mmu_idx.
119 */
120 switch (useridx) {
121 case ARMMMUIdx_E10_1:
122 case ARMMMUIdx_E10_1_PAN:
123 useridx = ARMMMUIdx_E10_0;
124 break;
125 case ARMMMUIdx_E20_2:
126 case ARMMMUIdx_E20_2_PAN:
127 useridx = ARMMMUIdx_E20_0;
128 break;
129 default:
130 g_assert_not_reached();
131 }
132 }
133 return arm_to_core_mmu_idx(useridx);
134 }
135
set_btype_raw(int val)136 static void set_btype_raw(int val)
137 {
138 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
139 offsetof(CPUARMState, btype));
140 }
141
set_btype(DisasContext * s,int val)142 static void set_btype(DisasContext *s, int val)
143 {
144 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
145 tcg_debug_assert(val >= 1 && val <= 3);
146 set_btype_raw(val);
147 s->btype = -1;
148 }
149
reset_btype(DisasContext * s)150 static void reset_btype(DisasContext *s)
151 {
152 if (s->btype != 0) {
153 set_btype_raw(0);
154 s->btype = 0;
155 }
156 }
157
gen_pc_plus_diff(DisasContext * s,TCGv_i64 dest,target_long diff)158 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
159 {
160 assert(s->pc_save != -1);
161 if (tb_cflags(s->base.tb) & CF_PCREL) {
162 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
163 } else {
164 tcg_gen_movi_i64(dest, s->pc_curr + diff);
165 }
166 }
167
gen_a64_update_pc(DisasContext * s,target_long diff)168 void gen_a64_update_pc(DisasContext *s, target_long diff)
169 {
170 gen_pc_plus_diff(s, cpu_pc, diff);
171 s->pc_save = s->pc_curr + diff;
172 }
173
174 /*
175 * Handle Top Byte Ignore (TBI) bits.
176 *
177 * If address tagging is enabled via the TCR TBI bits:
178 * + for EL2 and EL3 there is only one TBI bit, and if it is set
179 * then the address is zero-extended, clearing bits [63:56]
180 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
181 * and TBI1 controls addresses with bit 55 == 1.
182 * If the appropriate TBI bit is set for the address then
183 * the address is sign-extended from bit 55 into bits [63:56]
184 *
185 * Here We have concatenated TBI{1,0} into tbi.
186 */
gen_top_byte_ignore(DisasContext * s,TCGv_i64 dst,TCGv_i64 src,int tbi)187 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
188 TCGv_i64 src, int tbi)
189 {
190 if (tbi == 0) {
191 /* Load unmodified address */
192 tcg_gen_mov_i64(dst, src);
193 } else if (!regime_has_2_ranges(s->mmu_idx)) {
194 /* Force tag byte to all zero */
195 tcg_gen_extract_i64(dst, src, 0, 56);
196 } else {
197 /* Sign-extend from bit 55. */
198 tcg_gen_sextract_i64(dst, src, 0, 56);
199
200 switch (tbi) {
201 case 1:
202 /* tbi0 but !tbi1: only use the extension if positive */
203 tcg_gen_and_i64(dst, dst, src);
204 break;
205 case 2:
206 /* !tbi0 but tbi1: only use the extension if negative */
207 tcg_gen_or_i64(dst, dst, src);
208 break;
209 case 3:
210 /* tbi0 and tbi1: always use the extension */
211 break;
212 default:
213 g_assert_not_reached();
214 }
215 }
216 }
217
gen_a64_set_pc(DisasContext * s,TCGv_i64 src)218 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
219 {
220 /*
221 * If address tagging is enabled for instructions via the TCR TBI bits,
222 * then loading an address into the PC will clear out any tag.
223 */
224 gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
225 s->pc_save = -1;
226 }
227
228 /*
229 * Handle MTE and/or TBI.
230 *
231 * For TBI, ideally, we would do nothing. Proper behaviour on fault is
232 * for the tag to be present in the FAR_ELx register. But for user-only
233 * mode we do not have a TLB with which to implement this, so we must
234 * remove the top byte now.
235 *
236 * Always return a fresh temporary that we can increment independently
237 * of the write-back address.
238 */
239
clean_data_tbi(DisasContext * s,TCGv_i64 addr)240 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
241 {
242 TCGv_i64 clean = tcg_temp_new_i64();
243 #ifdef CONFIG_USER_ONLY
244 gen_top_byte_ignore(s, clean, addr, s->tbid);
245 #else
246 tcg_gen_mov_i64(clean, addr);
247 #endif
248 return clean;
249 }
250
251 /* Insert a zero tag into src, with the result at dst. */
gen_address_with_allocation_tag0(TCGv_i64 dst,TCGv_i64 src)252 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
253 {
254 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
255 }
256
gen_probe_access(DisasContext * s,TCGv_i64 ptr,MMUAccessType acc,int log2_size)257 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
258 MMUAccessType acc, int log2_size)
259 {
260 gen_helper_probe_access(tcg_env, ptr,
261 tcg_constant_i32(acc),
262 tcg_constant_i32(get_mem_index(s)),
263 tcg_constant_i32(1 << log2_size));
264 }
265
266 /*
267 * For MTE, check a single logical or atomic access. This probes a single
268 * address, the exact one specified. The size and alignment of the access
269 * is not relevant to MTE, per se, but watchpoints do require the size,
270 * and we want to recognize those before making any other changes to state.
271 */
gen_mte_check1_mmuidx(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop,bool is_unpriv,int core_idx)272 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
273 bool is_write, bool tag_checked,
274 MemOp memop, bool is_unpriv,
275 int core_idx)
276 {
277 if (tag_checked && s->mte_active[is_unpriv]) {
278 TCGv_i64 ret;
279 int desc = 0;
280
281 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
282 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
283 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
284 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
285 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
286 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
287
288 ret = tcg_temp_new_i64();
289 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
290
291 return ret;
292 }
293 return clean_data_tbi(s, addr);
294 }
295
gen_mte_check1(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop)296 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
297 bool tag_checked, MemOp memop)
298 {
299 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
300 false, get_mem_index(s));
301 }
302
303 /*
304 * For MTE, check multiple logical sequential accesses.
305 */
gen_mte_checkN(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,int total_size,MemOp single_mop)306 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
307 bool tag_checked, int total_size, MemOp single_mop)
308 {
309 if (tag_checked && s->mte_active[0]) {
310 TCGv_i64 ret;
311 int desc = 0;
312
313 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
314 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
315 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
316 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
317 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
318 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
319
320 ret = tcg_temp_new_i64();
321 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
322
323 return ret;
324 }
325 return clean_data_tbi(s, addr);
326 }
327
328 /*
329 * Generate the special alignment check that applies to AccType_ATOMIC
330 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
331 * naturally aligned, but it must not cross a 16-byte boundary.
332 * See AArch64.CheckAlignment().
333 */
check_lse2_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)334 static void check_lse2_align(DisasContext *s, int rn, int imm,
335 bool is_write, MemOp mop)
336 {
337 TCGv_i32 tmp;
338 TCGv_i64 addr;
339 TCGLabel *over_label;
340 MMUAccessType type;
341 int mmu_idx;
342
343 tmp = tcg_temp_new_i32();
344 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
345 tcg_gen_addi_i32(tmp, tmp, imm & 15);
346 tcg_gen_andi_i32(tmp, tmp, 15);
347 tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
348
349 over_label = gen_new_label();
350 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
351
352 addr = tcg_temp_new_i64();
353 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
354
355 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
356 mmu_idx = get_mem_index(s);
357 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
358 tcg_constant_i32(mmu_idx));
359
360 gen_set_label(over_label);
361
362 }
363
364 /* Handle the alignment check for AccType_ATOMIC instructions. */
check_atomic_align(DisasContext * s,int rn,MemOp mop)365 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
366 {
367 MemOp size = mop & MO_SIZE;
368
369 if (size == MO_8) {
370 return mop;
371 }
372
373 /*
374 * If size == MO_128, this is a LDXP, and the operation is single-copy
375 * atomic for each doubleword, not the entire quadword; it still must
376 * be quadword aligned.
377 */
378 if (size == MO_128) {
379 return finalize_memop_atom(s, MO_128 | MO_ALIGN,
380 MO_ATOM_IFALIGN_PAIR);
381 }
382 if (dc_isar_feature(aa64_lse2, s)) {
383 check_lse2_align(s, rn, 0, true, mop);
384 } else {
385 mop |= MO_ALIGN;
386 }
387 return finalize_memop(s, mop);
388 }
389
390 /* Handle the alignment check for AccType_ORDERED instructions. */
check_ordered_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)391 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
392 bool is_write, MemOp mop)
393 {
394 MemOp size = mop & MO_SIZE;
395
396 if (size == MO_8) {
397 return mop;
398 }
399 if (size == MO_128) {
400 return finalize_memop_atom(s, MO_128 | MO_ALIGN,
401 MO_ATOM_IFALIGN_PAIR);
402 }
403 if (!dc_isar_feature(aa64_lse2, s)) {
404 mop |= MO_ALIGN;
405 } else if (!s->naa) {
406 check_lse2_align(s, rn, imm, is_write, mop);
407 }
408 return finalize_memop(s, mop);
409 }
410
411 typedef struct DisasCompare64 {
412 TCGCond cond;
413 TCGv_i64 value;
414 } DisasCompare64;
415
a64_test_cc(DisasCompare64 * c64,int cc)416 static void a64_test_cc(DisasCompare64 *c64, int cc)
417 {
418 DisasCompare c32;
419
420 arm_test_cc(&c32, cc);
421
422 /*
423 * Sign-extend the 32-bit value so that the GE/LT comparisons work
424 * properly. The NE/EQ comparisons are also fine with this choice.
425 */
426 c64->cond = c32.cond;
427 c64->value = tcg_temp_new_i64();
428 tcg_gen_ext_i32_i64(c64->value, c32.value);
429 }
430
gen_rebuild_hflags(DisasContext * s)431 static void gen_rebuild_hflags(DisasContext *s)
432 {
433 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
434 }
435
gen_exception_internal(int excp)436 static void gen_exception_internal(int excp)
437 {
438 assert(excp_is_internal(excp));
439 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
440 }
441
gen_exception_internal_insn(DisasContext * s,int excp)442 static void gen_exception_internal_insn(DisasContext *s, int excp)
443 {
444 gen_a64_update_pc(s, 0);
445 gen_exception_internal(excp);
446 s->base.is_jmp = DISAS_NORETURN;
447 }
448
gen_exception_bkpt_insn(DisasContext * s,uint32_t syndrome)449 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
450 {
451 gen_a64_update_pc(s, 0);
452 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
453 s->base.is_jmp = DISAS_NORETURN;
454 }
455
gen_step_complete_exception(DisasContext * s)456 static void gen_step_complete_exception(DisasContext *s)
457 {
458 /* We just completed step of an insn. Move from Active-not-pending
459 * to Active-pending, and then also take the swstep exception.
460 * This corresponds to making the (IMPDEF) choice to prioritize
461 * swstep exceptions over asynchronous exceptions taken to an exception
462 * level where debug is disabled. This choice has the advantage that
463 * we do not need to maintain internal state corresponding to the
464 * ISV/EX syndrome bits between completion of the step and generation
465 * of the exception, and our syndrome information is always correct.
466 */
467 gen_ss_advance(s);
468 gen_swstep_exception(s, 1, s->is_ldex);
469 s->base.is_jmp = DISAS_NORETURN;
470 }
471
use_goto_tb(DisasContext * s,uint64_t dest)472 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
473 {
474 if (s->ss_active) {
475 return false;
476 }
477 return translator_use_goto_tb(&s->base, dest);
478 }
479
gen_goto_tb(DisasContext * s,int n,int64_t diff)480 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
481 {
482 if (use_goto_tb(s, s->pc_curr + diff)) {
483 /*
484 * For pcrel, the pc must always be up-to-date on entry to
485 * the linked TB, so that it can use simple additions for all
486 * further adjustments. For !pcrel, the linked TB is compiled
487 * to know its full virtual address, so we can delay the
488 * update to pc to the unlinked path. A long chain of links
489 * can thus avoid many updates to the PC.
490 */
491 if (tb_cflags(s->base.tb) & CF_PCREL) {
492 gen_a64_update_pc(s, diff);
493 tcg_gen_goto_tb(n);
494 } else {
495 tcg_gen_goto_tb(n);
496 gen_a64_update_pc(s, diff);
497 }
498 tcg_gen_exit_tb(s->base.tb, n);
499 s->base.is_jmp = DISAS_NORETURN;
500 } else {
501 gen_a64_update_pc(s, diff);
502 if (s->ss_active) {
503 gen_step_complete_exception(s);
504 } else {
505 tcg_gen_lookup_and_goto_ptr();
506 s->base.is_jmp = DISAS_NORETURN;
507 }
508 }
509 }
510
511 /*
512 * Register access functions
513 *
514 * These functions are used for directly accessing a register in where
515 * changes to the final register value are likely to be made. If you
516 * need to use a register for temporary calculation (e.g. index type
517 * operations) use the read_* form.
518 *
519 * B1.2.1 Register mappings
520 *
521 * In instruction register encoding 31 can refer to ZR (zero register) or
522 * the SP (stack pointer) depending on context. In QEMU's case we map SP
523 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
524 * This is the point of the _sp forms.
525 */
cpu_reg(DisasContext * s,int reg)526 TCGv_i64 cpu_reg(DisasContext *s, int reg)
527 {
528 if (reg == 31) {
529 TCGv_i64 t = tcg_temp_new_i64();
530 tcg_gen_movi_i64(t, 0);
531 return t;
532 } else {
533 return cpu_X[reg];
534 }
535 }
536
537 /* register access for when 31 == SP */
cpu_reg_sp(DisasContext * s,int reg)538 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
539 {
540 return cpu_X[reg];
541 }
542
543 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
544 * representing the register contents. This TCGv is an auto-freed
545 * temporary so it need not be explicitly freed, and may be modified.
546 */
read_cpu_reg(DisasContext * s,int reg,int sf)547 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
548 {
549 TCGv_i64 v = tcg_temp_new_i64();
550 if (reg != 31) {
551 if (sf) {
552 tcg_gen_mov_i64(v, cpu_X[reg]);
553 } else {
554 tcg_gen_ext32u_i64(v, cpu_X[reg]);
555 }
556 } else {
557 tcg_gen_movi_i64(v, 0);
558 }
559 return v;
560 }
561
read_cpu_reg_sp(DisasContext * s,int reg,int sf)562 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
563 {
564 TCGv_i64 v = tcg_temp_new_i64();
565 if (sf) {
566 tcg_gen_mov_i64(v, cpu_X[reg]);
567 } else {
568 tcg_gen_ext32u_i64(v, cpu_X[reg]);
569 }
570 return v;
571 }
572
573 /* Return the offset into CPUARMState of a slice (from
574 * the least significant end) of FP register Qn (ie
575 * Dn, Sn, Hn or Bn).
576 * (Note that this is not the same mapping as for A32; see cpu.h)
577 */
fp_reg_offset(DisasContext * s,int regno,MemOp size)578 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
579 {
580 return vec_reg_offset(s, regno, 0, size);
581 }
582
583 /* Offset of the high half of the 128 bit vector Qn */
fp_reg_hi_offset(DisasContext * s,int regno)584 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
585 {
586 return vec_reg_offset(s, regno, 1, MO_64);
587 }
588
589 /* Convenience accessors for reading and writing single and double
590 * FP registers. Writing clears the upper parts of the associated
591 * 128 bit vector register, as required by the architecture.
592 * Note that unlike the GP register accessors, the values returned
593 * by the read functions must be manually freed.
594 */
read_fp_dreg(DisasContext * s,int reg)595 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
596 {
597 TCGv_i64 v = tcg_temp_new_i64();
598
599 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
600 return v;
601 }
602
read_fp_sreg(DisasContext * s,int reg)603 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
604 {
605 TCGv_i32 v = tcg_temp_new_i32();
606
607 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
608 return v;
609 }
610
read_fp_hreg(DisasContext * s,int reg)611 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
612 {
613 TCGv_i32 v = tcg_temp_new_i32();
614
615 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
616 return v;
617 }
618
clear_vec(DisasContext * s,int rd)619 static void clear_vec(DisasContext *s, int rd)
620 {
621 unsigned ofs = fp_reg_offset(s, rd, MO_64);
622 unsigned vsz = vec_full_reg_size(s);
623
624 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
625 }
626
627 /*
628 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
629 * If SVE is not enabled, then there are only 128 bits in the vector.
630 */
clear_vec_high(DisasContext * s,bool is_q,int rd)631 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
632 {
633 unsigned ofs = fp_reg_offset(s, rd, MO_64);
634 unsigned vsz = vec_full_reg_size(s);
635
636 /* Nop move, with side effect of clearing the tail. */
637 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
638 }
639
write_fp_dreg(DisasContext * s,int reg,TCGv_i64 v)640 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
641 {
642 unsigned ofs = fp_reg_offset(s, reg, MO_64);
643
644 tcg_gen_st_i64(v, tcg_env, ofs);
645 clear_vec_high(s, false, reg);
646 }
647
write_fp_sreg(DisasContext * s,int reg,TCGv_i32 v)648 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
649 {
650 TCGv_i64 tmp = tcg_temp_new_i64();
651
652 tcg_gen_extu_i32_i64(tmp, v);
653 write_fp_dreg(s, reg, tmp);
654 }
655
656 /*
657 * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
658 * - if FPCR.NEP == 0, clear the high elements of reg
659 * - if FPCR.NEP == 1, set the high elements of reg from mergereg
660 * (i.e. merge the result with those high elements)
661 * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
662 */
write_fp_dreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i64 v)663 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
664 TCGv_i64 v)
665 {
666 if (!s->fpcr_nep) {
667 write_fp_dreg(s, reg, v);
668 return;
669 }
670
671 /*
672 * Move from mergereg to reg; this sets the high elements and
673 * clears the bits above 128 as a side effect.
674 */
675 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
676 vec_full_reg_offset(s, mergereg),
677 16, vec_full_reg_size(s));
678 tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
679 }
680
681 /*
682 * Write a single-prec result, but only clear the higher elements
683 * of the destination register if FPCR.NEP is 0; otherwise preserve them.
684 */
write_fp_sreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i32 v)685 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
686 TCGv_i32 v)
687 {
688 if (!s->fpcr_nep) {
689 write_fp_sreg(s, reg, v);
690 return;
691 }
692
693 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
694 vec_full_reg_offset(s, mergereg),
695 16, vec_full_reg_size(s));
696 tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
697 }
698
699 /*
700 * Write a half-prec result, but only clear the higher elements
701 * of the destination register if FPCR.NEP is 0; otherwise preserve them.
702 * The caller must ensure that the top 16 bits of v are zero.
703 */
write_fp_hreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i32 v)704 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
705 TCGv_i32 v)
706 {
707 if (!s->fpcr_nep) {
708 write_fp_sreg(s, reg, v);
709 return;
710 }
711
712 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
713 vec_full_reg_offset(s, mergereg),
714 16, vec_full_reg_size(s));
715 tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
716 }
717
718 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn2(DisasContext * s,bool is_q,int rd,int rn,GVecGen2Fn * gvec_fn,int vece)719 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
720 GVecGen2Fn *gvec_fn, int vece)
721 {
722 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
723 is_q ? 16 : 8, vec_full_reg_size(s));
724 }
725
726 /* Expand a 2-operand + immediate AdvSIMD vector operation using
727 * an expander function.
728 */
gen_gvec_fn2i(DisasContext * s,bool is_q,int rd,int rn,int64_t imm,GVecGen2iFn * gvec_fn,int vece)729 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
730 int64_t imm, GVecGen2iFn *gvec_fn, int vece)
731 {
732 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
733 imm, is_q ? 16 : 8, vec_full_reg_size(s));
734 }
735
736 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn3(DisasContext * s,bool is_q,int rd,int rn,int rm,GVecGen3Fn * gvec_fn,int vece)737 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
738 GVecGen3Fn *gvec_fn, int vece)
739 {
740 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
741 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
742 }
743
744 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn4(DisasContext * s,bool is_q,int rd,int rn,int rm,int rx,GVecGen4Fn * gvec_fn,int vece)745 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
746 int rx, GVecGen4Fn *gvec_fn, int vece)
747 {
748 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
749 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
750 is_q ? 16 : 8, vec_full_reg_size(s));
751 }
752
753 /* Expand a 2-operand operation using an out-of-line helper. */
gen_gvec_op2_ool(DisasContext * s,bool is_q,int rd,int rn,int data,gen_helper_gvec_2 * fn)754 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
755 int rn, int data, gen_helper_gvec_2 *fn)
756 {
757 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
758 vec_full_reg_offset(s, rn),
759 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
760 }
761
762 /* Expand a 3-operand operation using an out-of-line helper. */
gen_gvec_op3_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int data,gen_helper_gvec_3 * fn)763 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
764 int rn, int rm, int data, gen_helper_gvec_3 *fn)
765 {
766 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
767 vec_full_reg_offset(s, rn),
768 vec_full_reg_offset(s, rm),
769 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
770 }
771
772 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
773 * an out-of-line helper.
774 */
gen_gvec_op3_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,ARMFPStatusFlavour fpsttype,int data,gen_helper_gvec_3_ptr * fn)775 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
776 int rm, ARMFPStatusFlavour fpsttype, int data,
777 gen_helper_gvec_3_ptr *fn)
778 {
779 TCGv_ptr fpst = fpstatus_ptr(fpsttype);
780 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
781 vec_full_reg_offset(s, rn),
782 vec_full_reg_offset(s, rm), fpst,
783 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
784 }
785
786 /* Expand a 4-operand operation using an out-of-line helper. */
gen_gvec_op4_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4 * fn)787 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
788 int rm, int ra, int data, gen_helper_gvec_4 *fn)
789 {
790 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
791 vec_full_reg_offset(s, rn),
792 vec_full_reg_offset(s, rm),
793 vec_full_reg_offset(s, ra),
794 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
795 }
796
797 /*
798 * Expand a 4-operand operation using an out-of-line helper that takes
799 * a pointer to the CPU env.
800 */
gen_gvec_op4_env(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4_ptr * fn)801 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
802 int rm, int ra, int data,
803 gen_helper_gvec_4_ptr *fn)
804 {
805 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
806 vec_full_reg_offset(s, rn),
807 vec_full_reg_offset(s, rm),
808 vec_full_reg_offset(s, ra),
809 tcg_env,
810 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
811 }
812
813 /*
814 * Expand a 4-operand + fpstatus pointer + simd data value operation using
815 * an out-of-line helper.
816 */
gen_gvec_op4_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,ARMFPStatusFlavour fpsttype,int data,gen_helper_gvec_4_ptr * fn)817 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
818 int rm, int ra, ARMFPStatusFlavour fpsttype,
819 int data,
820 gen_helper_gvec_4_ptr *fn)
821 {
822 TCGv_ptr fpst = fpstatus_ptr(fpsttype);
823 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
824 vec_full_reg_offset(s, rn),
825 vec_full_reg_offset(s, rm),
826 vec_full_reg_offset(s, ra), fpst,
827 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
828 }
829
830 /*
831 * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
832 * These functions implement
833 * d = floatN_is_any_nan(s) ? s : floatN_chs(s)
834 * which for float32 is
835 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
836 * and similarly for the other float sizes.
837 */
gen_vfp_ah_negh(TCGv_i32 d,TCGv_i32 s)838 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
839 {
840 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
841
842 gen_vfp_negh(chs_s, s);
843 gen_vfp_absh(abs_s, s);
844 tcg_gen_movcond_i32(TCG_COND_GTU, d,
845 abs_s, tcg_constant_i32(0x7c00),
846 s, chs_s);
847 }
848
gen_vfp_ah_negs(TCGv_i32 d,TCGv_i32 s)849 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
850 {
851 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
852
853 gen_vfp_negs(chs_s, s);
854 gen_vfp_abss(abs_s, s);
855 tcg_gen_movcond_i32(TCG_COND_GTU, d,
856 abs_s, tcg_constant_i32(0x7f800000UL),
857 s, chs_s);
858 }
859
gen_vfp_ah_negd(TCGv_i64 d,TCGv_i64 s)860 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
861 {
862 TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
863
864 gen_vfp_negd(chs_s, s);
865 gen_vfp_absd(abs_s, s);
866 tcg_gen_movcond_i64(TCG_COND_GTU, d,
867 abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
868 s, chs_s);
869 }
870
871 /*
872 * These functions implement
873 * d = floatN_is_any_nan(s) ? s : floatN_abs(s)
874 * which for float32 is
875 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
876 * and similarly for the other float sizes.
877 */
gen_vfp_ah_absh(TCGv_i32 d,TCGv_i32 s)878 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
879 {
880 TCGv_i32 abs_s = tcg_temp_new_i32();
881
882 gen_vfp_absh(abs_s, s);
883 tcg_gen_movcond_i32(TCG_COND_GTU, d,
884 abs_s, tcg_constant_i32(0x7c00),
885 s, abs_s);
886 }
887
gen_vfp_ah_abss(TCGv_i32 d,TCGv_i32 s)888 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
889 {
890 TCGv_i32 abs_s = tcg_temp_new_i32();
891
892 gen_vfp_abss(abs_s, s);
893 tcg_gen_movcond_i32(TCG_COND_GTU, d,
894 abs_s, tcg_constant_i32(0x7f800000UL),
895 s, abs_s);
896 }
897
gen_vfp_ah_absd(TCGv_i64 d,TCGv_i64 s)898 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
899 {
900 TCGv_i64 abs_s = tcg_temp_new_i64();
901
902 gen_vfp_absd(abs_s, s);
903 tcg_gen_movcond_i64(TCG_COND_GTU, d,
904 abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
905 s, abs_s);
906 }
907
gen_vfp_maybe_ah_negh(DisasContext * dc,TCGv_i32 d,TCGv_i32 s)908 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
909 {
910 if (dc->fpcr_ah) {
911 gen_vfp_ah_negh(d, s);
912 } else {
913 gen_vfp_negh(d, s);
914 }
915 }
916
gen_vfp_maybe_ah_negs(DisasContext * dc,TCGv_i32 d,TCGv_i32 s)917 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
918 {
919 if (dc->fpcr_ah) {
920 gen_vfp_ah_negs(d, s);
921 } else {
922 gen_vfp_negs(d, s);
923 }
924 }
925
gen_vfp_maybe_ah_negd(DisasContext * dc,TCGv_i64 d,TCGv_i64 s)926 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
927 {
928 if (dc->fpcr_ah) {
929 gen_vfp_ah_negd(d, s);
930 } else {
931 gen_vfp_negd(d, s);
932 }
933 }
934
935 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
936 * than the 32 bit equivalent.
937 */
gen_set_NZ64(TCGv_i64 result)938 static inline void gen_set_NZ64(TCGv_i64 result)
939 {
940 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
941 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
942 }
943
944 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
gen_logic_CC(int sf,TCGv_i64 result)945 static inline void gen_logic_CC(int sf, TCGv_i64 result)
946 {
947 if (sf) {
948 gen_set_NZ64(result);
949 } else {
950 tcg_gen_extrl_i64_i32(cpu_ZF, result);
951 tcg_gen_mov_i32(cpu_NF, cpu_ZF);
952 }
953 tcg_gen_movi_i32(cpu_CF, 0);
954 tcg_gen_movi_i32(cpu_VF, 0);
955 }
956
957 /* dest = T0 + T1; compute C, N, V and Z flags */
gen_add64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)958 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
959 {
960 TCGv_i64 result, flag, tmp;
961 result = tcg_temp_new_i64();
962 flag = tcg_temp_new_i64();
963 tmp = tcg_temp_new_i64();
964
965 tcg_gen_movi_i64(tmp, 0);
966 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
967
968 tcg_gen_extrl_i64_i32(cpu_CF, flag);
969
970 gen_set_NZ64(result);
971
972 tcg_gen_xor_i64(flag, result, t0);
973 tcg_gen_xor_i64(tmp, t0, t1);
974 tcg_gen_andc_i64(flag, flag, tmp);
975 tcg_gen_extrh_i64_i32(cpu_VF, flag);
976
977 tcg_gen_mov_i64(dest, result);
978 }
979
gen_add32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)980 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
981 {
982 TCGv_i32 t0_32 = tcg_temp_new_i32();
983 TCGv_i32 t1_32 = tcg_temp_new_i32();
984 TCGv_i32 tmp = tcg_temp_new_i32();
985
986 tcg_gen_movi_i32(tmp, 0);
987 tcg_gen_extrl_i64_i32(t0_32, t0);
988 tcg_gen_extrl_i64_i32(t1_32, t1);
989 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
990 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
991 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
992 tcg_gen_xor_i32(tmp, t0_32, t1_32);
993 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
994 tcg_gen_extu_i32_i64(dest, cpu_NF);
995 }
996
gen_add_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)997 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
998 {
999 if (sf) {
1000 gen_add64_CC(dest, t0, t1);
1001 } else {
1002 gen_add32_CC(dest, t0, t1);
1003 }
1004 }
1005
1006 /* dest = T0 - T1; compute C, N, V and Z flags */
gen_sub64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1007 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1008 {
1009 /* 64 bit arithmetic */
1010 TCGv_i64 result, flag, tmp;
1011
1012 result = tcg_temp_new_i64();
1013 flag = tcg_temp_new_i64();
1014 tcg_gen_sub_i64(result, t0, t1);
1015
1016 gen_set_NZ64(result);
1017
1018 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
1019 tcg_gen_extrl_i64_i32(cpu_CF, flag);
1020
1021 tcg_gen_xor_i64(flag, result, t0);
1022 tmp = tcg_temp_new_i64();
1023 tcg_gen_xor_i64(tmp, t0, t1);
1024 tcg_gen_and_i64(flag, flag, tmp);
1025 tcg_gen_extrh_i64_i32(cpu_VF, flag);
1026 tcg_gen_mov_i64(dest, result);
1027 }
1028
gen_sub32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1029 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1030 {
1031 /* 32 bit arithmetic */
1032 TCGv_i32 t0_32 = tcg_temp_new_i32();
1033 TCGv_i32 t1_32 = tcg_temp_new_i32();
1034 TCGv_i32 tmp;
1035
1036 tcg_gen_extrl_i64_i32(t0_32, t0);
1037 tcg_gen_extrl_i64_i32(t1_32, t1);
1038 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
1039 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1040 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
1041 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1042 tmp = tcg_temp_new_i32();
1043 tcg_gen_xor_i32(tmp, t0_32, t1_32);
1044 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
1045 tcg_gen_extu_i32_i64(dest, cpu_NF);
1046 }
1047
gen_sub_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1048 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1049 {
1050 if (sf) {
1051 gen_sub64_CC(dest, t0, t1);
1052 } else {
1053 gen_sub32_CC(dest, t0, t1);
1054 }
1055 }
1056
1057 /* dest = T0 + T1 + CF; do not compute flags. */
gen_adc(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1058 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1059 {
1060 TCGv_i64 flag = tcg_temp_new_i64();
1061 tcg_gen_extu_i32_i64(flag, cpu_CF);
1062 tcg_gen_add_i64(dest, t0, t1);
1063 tcg_gen_add_i64(dest, dest, flag);
1064
1065 if (!sf) {
1066 tcg_gen_ext32u_i64(dest, dest);
1067 }
1068 }
1069
1070 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
gen_adc_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1071 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1072 {
1073 if (sf) {
1074 TCGv_i64 result = tcg_temp_new_i64();
1075 TCGv_i64 cf_64 = tcg_temp_new_i64();
1076 TCGv_i64 vf_64 = tcg_temp_new_i64();
1077 TCGv_i64 tmp = tcg_temp_new_i64();
1078
1079 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
1080 tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64);
1081 tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
1082 gen_set_NZ64(result);
1083
1084 tcg_gen_xor_i64(vf_64, result, t0);
1085 tcg_gen_xor_i64(tmp, t0, t1);
1086 tcg_gen_andc_i64(vf_64, vf_64, tmp);
1087 tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
1088
1089 tcg_gen_mov_i64(dest, result);
1090 } else {
1091 TCGv_i32 t0_32 = tcg_temp_new_i32();
1092 TCGv_i32 t1_32 = tcg_temp_new_i32();
1093 TCGv_i32 tmp = tcg_temp_new_i32();
1094
1095 tcg_gen_extrl_i64_i32(t0_32, t0);
1096 tcg_gen_extrl_i64_i32(t1_32, t1);
1097 tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF);
1098
1099 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1100 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1101 tcg_gen_xor_i32(tmp, t0_32, t1_32);
1102 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
1103 tcg_gen_extu_i32_i64(dest, cpu_NF);
1104 }
1105 }
1106
1107 /*
1108 * Load/Store generators
1109 */
1110
1111 /*
1112 * Store from GPR register to memory.
1113 */
do_gpr_st_memidx(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1114 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
1115 TCGv_i64 tcg_addr, MemOp memop, int memidx,
1116 bool iss_valid,
1117 unsigned int iss_srt,
1118 bool iss_sf, bool iss_ar)
1119 {
1120 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
1121
1122 if (iss_valid) {
1123 uint32_t syn;
1124
1125 syn = syn_data_abort_with_iss(0,
1126 (memop & MO_SIZE),
1127 false,
1128 iss_srt,
1129 iss_sf,
1130 iss_ar,
1131 0, 0, 0, 0, 0, false);
1132 disas_set_insn_syndrome(s, syn);
1133 }
1134 }
1135
do_gpr_st(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1136 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
1137 TCGv_i64 tcg_addr, MemOp memop,
1138 bool iss_valid,
1139 unsigned int iss_srt,
1140 bool iss_sf, bool iss_ar)
1141 {
1142 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
1143 iss_valid, iss_srt, iss_sf, iss_ar);
1144 }
1145
1146 /*
1147 * Load from memory to GPR register
1148 */
do_gpr_ld_memidx(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1149 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1150 MemOp memop, bool extend, int memidx,
1151 bool iss_valid, unsigned int iss_srt,
1152 bool iss_sf, bool iss_ar)
1153 {
1154 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
1155
1156 if (extend && (memop & MO_SIGN)) {
1157 g_assert((memop & MO_SIZE) <= MO_32);
1158 tcg_gen_ext32u_i64(dest, dest);
1159 }
1160
1161 if (iss_valid) {
1162 uint32_t syn;
1163
1164 syn = syn_data_abort_with_iss(0,
1165 (memop & MO_SIZE),
1166 (memop & MO_SIGN) != 0,
1167 iss_srt,
1168 iss_sf,
1169 iss_ar,
1170 0, 0, 0, 0, 0, false);
1171 disas_set_insn_syndrome(s, syn);
1172 }
1173 }
1174
do_gpr_ld(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1175 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1176 MemOp memop, bool extend,
1177 bool iss_valid, unsigned int iss_srt,
1178 bool iss_sf, bool iss_ar)
1179 {
1180 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1181 iss_valid, iss_srt, iss_sf, iss_ar);
1182 }
1183
1184 /*
1185 * Store from FP register to memory
1186 */
do_fp_st(DisasContext * s,int srcidx,TCGv_i64 tcg_addr,MemOp mop)1187 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1188 {
1189 /* This writes the bottom N bits of a 128 bit wide vector to memory */
1190 TCGv_i64 tmplo = tcg_temp_new_i64();
1191
1192 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1193
1194 if ((mop & MO_SIZE) < MO_128) {
1195 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1196 } else {
1197 TCGv_i64 tmphi = tcg_temp_new_i64();
1198 TCGv_i128 t16 = tcg_temp_new_i128();
1199
1200 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1201 tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1202
1203 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1204 }
1205 }
1206
1207 /*
1208 * Load from memory to FP register
1209 */
do_fp_ld(DisasContext * s,int destidx,TCGv_i64 tcg_addr,MemOp mop)1210 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1211 {
1212 /* This always zero-extends and writes to a full 128 bit wide vector */
1213 TCGv_i64 tmplo = tcg_temp_new_i64();
1214 TCGv_i64 tmphi = NULL;
1215
1216 if ((mop & MO_SIZE) < MO_128) {
1217 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1218 } else {
1219 TCGv_i128 t16 = tcg_temp_new_i128();
1220
1221 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1222
1223 tmphi = tcg_temp_new_i64();
1224 tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1225 }
1226
1227 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1228
1229 if (tmphi) {
1230 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1231 }
1232 clear_vec_high(s, tmphi != NULL, destidx);
1233 }
1234
1235 /*
1236 * Vector load/store helpers.
1237 *
1238 * The principal difference between this and a FP load is that we don't
1239 * zero extend as we are filling a partial chunk of the vector register.
1240 * These functions don't support 128 bit loads/stores, which would be
1241 * normal load/store operations.
1242 *
1243 * The _i32 versions are useful when operating on 32 bit quantities
1244 * (eg for floating point single or using Neon helper functions).
1245 */
1246
1247 /* Get value of an element within a vector register */
read_vec_element(DisasContext * s,TCGv_i64 tcg_dest,int srcidx,int element,MemOp memop)1248 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1249 int element, MemOp memop)
1250 {
1251 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1252 switch ((unsigned)memop) {
1253 case MO_8:
1254 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1255 break;
1256 case MO_16:
1257 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1258 break;
1259 case MO_32:
1260 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1261 break;
1262 case MO_8|MO_SIGN:
1263 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1264 break;
1265 case MO_16|MO_SIGN:
1266 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1267 break;
1268 case MO_32|MO_SIGN:
1269 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1270 break;
1271 case MO_64:
1272 case MO_64|MO_SIGN:
1273 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1274 break;
1275 default:
1276 g_assert_not_reached();
1277 }
1278 }
1279
read_vec_element_i32(DisasContext * s,TCGv_i32 tcg_dest,int srcidx,int element,MemOp memop)1280 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1281 int element, MemOp memop)
1282 {
1283 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1284 switch (memop) {
1285 case MO_8:
1286 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1287 break;
1288 case MO_16:
1289 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1290 break;
1291 case MO_8|MO_SIGN:
1292 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1293 break;
1294 case MO_16|MO_SIGN:
1295 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1296 break;
1297 case MO_32:
1298 case MO_32|MO_SIGN:
1299 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1300 break;
1301 default:
1302 g_assert_not_reached();
1303 }
1304 }
1305
1306 /* Set value of an element within a vector register */
write_vec_element(DisasContext * s,TCGv_i64 tcg_src,int destidx,int element,MemOp memop)1307 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1308 int element, MemOp memop)
1309 {
1310 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1311 switch (memop) {
1312 case MO_8:
1313 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1314 break;
1315 case MO_16:
1316 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1317 break;
1318 case MO_32:
1319 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1320 break;
1321 case MO_64:
1322 tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1323 break;
1324 default:
1325 g_assert_not_reached();
1326 }
1327 }
1328
write_vec_element_i32(DisasContext * s,TCGv_i32 tcg_src,int destidx,int element,MemOp memop)1329 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1330 int destidx, int element, MemOp memop)
1331 {
1332 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1333 switch (memop) {
1334 case MO_8:
1335 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1336 break;
1337 case MO_16:
1338 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1339 break;
1340 case MO_32:
1341 tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1342 break;
1343 default:
1344 g_assert_not_reached();
1345 }
1346 }
1347
1348 /* Store from vector register to memory */
do_vec_st(DisasContext * s,int srcidx,int element,TCGv_i64 tcg_addr,MemOp mop)1349 static void do_vec_st(DisasContext *s, int srcidx, int element,
1350 TCGv_i64 tcg_addr, MemOp mop)
1351 {
1352 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1353
1354 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1355 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1356 }
1357
1358 /* Load from memory to vector register */
do_vec_ld(DisasContext * s,int destidx,int element,TCGv_i64 tcg_addr,MemOp mop)1359 static void do_vec_ld(DisasContext *s, int destidx, int element,
1360 TCGv_i64 tcg_addr, MemOp mop)
1361 {
1362 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1363
1364 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1365 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1366 }
1367
1368 /* Check that FP/Neon access is enabled. If it is, return
1369 * true. If not, emit code to generate an appropriate exception,
1370 * and return false; the caller should not emit any code for
1371 * the instruction. Note that this check must happen after all
1372 * unallocated-encoding checks (otherwise the syndrome information
1373 * for the resulting exception will be incorrect).
1374 */
fp_access_check_only(DisasContext * s)1375 static bool fp_access_check_only(DisasContext *s)
1376 {
1377 if (s->fp_excp_el) {
1378 assert(!s->fp_access_checked);
1379 s->fp_access_checked = -1;
1380
1381 gen_exception_insn_el(s, 0, EXCP_UDEF,
1382 syn_fp_access_trap(1, 0xe, false, 0),
1383 s->fp_excp_el);
1384 return false;
1385 }
1386 s->fp_access_checked = 1;
1387 return true;
1388 }
1389
fp_access_check(DisasContext * s)1390 static bool fp_access_check(DisasContext *s)
1391 {
1392 if (!fp_access_check_only(s)) {
1393 return false;
1394 }
1395 if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1396 gen_exception_insn(s, 0, EXCP_UDEF,
1397 syn_smetrap(SME_ET_Streaming, false));
1398 return false;
1399 }
1400 return true;
1401 }
1402
1403 /*
1404 * Return <0 for non-supported element sizes, with MO_16 controlled by
1405 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1406 */
fp_access_check_scalar_hsd(DisasContext * s,MemOp esz)1407 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1408 {
1409 switch (esz) {
1410 case MO_64:
1411 case MO_32:
1412 break;
1413 case MO_16:
1414 if (!dc_isar_feature(aa64_fp16, s)) {
1415 return -1;
1416 }
1417 break;
1418 default:
1419 return -1;
1420 }
1421 return fp_access_check(s);
1422 }
1423
1424 /* Likewise, but vector MO_64 must have two elements. */
fp_access_check_vector_hsd(DisasContext * s,bool is_q,MemOp esz)1425 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1426 {
1427 switch (esz) {
1428 case MO_64:
1429 if (!is_q) {
1430 return -1;
1431 }
1432 break;
1433 case MO_32:
1434 break;
1435 case MO_16:
1436 if (!dc_isar_feature(aa64_fp16, s)) {
1437 return -1;
1438 }
1439 break;
1440 default:
1441 return -1;
1442 }
1443 return fp_access_check(s);
1444 }
1445
1446 /*
1447 * Check that SVE access is enabled. If it is, return true.
1448 * If not, emit code to generate an appropriate exception and return false.
1449 * This function corresponds to CheckSVEEnabled().
1450 */
sve_access_check(DisasContext * s)1451 bool sve_access_check(DisasContext *s)
1452 {
1453 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1454 bool ret;
1455
1456 assert(dc_isar_feature(aa64_sme, s));
1457 ret = sme_sm_enabled_check(s);
1458 s->sve_access_checked = (ret ? 1 : -1);
1459 return ret;
1460 }
1461 if (s->sve_excp_el) {
1462 /* Assert that we only raise one exception per instruction. */
1463 assert(!s->sve_access_checked);
1464 gen_exception_insn_el(s, 0, EXCP_UDEF,
1465 syn_sve_access_trap(), s->sve_excp_el);
1466 s->sve_access_checked = -1;
1467 return false;
1468 }
1469 s->sve_access_checked = 1;
1470 return fp_access_check(s);
1471 }
1472
1473 /*
1474 * Check that SME access is enabled, raise an exception if not.
1475 * Note that this function corresponds to CheckSMEAccess and is
1476 * only used directly for cpregs.
1477 */
sme_access_check(DisasContext * s)1478 static bool sme_access_check(DisasContext *s)
1479 {
1480 if (s->sme_excp_el) {
1481 gen_exception_insn_el(s, 0, EXCP_UDEF,
1482 syn_smetrap(SME_ET_AccessTrap, false),
1483 s->sme_excp_el);
1484 return false;
1485 }
1486 return true;
1487 }
1488
1489 /* This function corresponds to CheckSMEEnabled. */
sme_enabled_check(DisasContext * s)1490 bool sme_enabled_check(DisasContext *s)
1491 {
1492 /*
1493 * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1494 * to be zero when fp_excp_el has priority. This is because we need
1495 * sme_excp_el by itself for cpregs access checks.
1496 */
1497 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1498 bool ret = sme_access_check(s);
1499 s->fp_access_checked = (ret ? 1 : -1);
1500 return ret;
1501 }
1502 return fp_access_check_only(s);
1503 }
1504
1505 /* Common subroutine for CheckSMEAnd*Enabled. */
sme_enabled_check_with_svcr(DisasContext * s,unsigned req)1506 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1507 {
1508 if (!sme_enabled_check(s)) {
1509 return false;
1510 }
1511 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1512 gen_exception_insn(s, 0, EXCP_UDEF,
1513 syn_smetrap(SME_ET_NotStreaming, false));
1514 return false;
1515 }
1516 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1517 gen_exception_insn(s, 0, EXCP_UDEF,
1518 syn_smetrap(SME_ET_InactiveZA, false));
1519 return false;
1520 }
1521 return true;
1522 }
1523
1524 /*
1525 * Expanders for AdvSIMD translation functions.
1526 */
1527
do_gvec_op2_ool(DisasContext * s,arg_qrr_e * a,int data,gen_helper_gvec_2 * fn)1528 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1529 gen_helper_gvec_2 *fn)
1530 {
1531 if (!a->q && a->esz == MO_64) {
1532 return false;
1533 }
1534 if (fp_access_check(s)) {
1535 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1536 }
1537 return true;
1538 }
1539
do_gvec_op3_ool(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3 * fn)1540 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1541 gen_helper_gvec_3 *fn)
1542 {
1543 if (!a->q && a->esz == MO_64) {
1544 return false;
1545 }
1546 if (fp_access_check(s)) {
1547 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1548 }
1549 return true;
1550 }
1551
do_gvec_fn3(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1552 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1553 {
1554 if (!a->q && a->esz == MO_64) {
1555 return false;
1556 }
1557 if (fp_access_check(s)) {
1558 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1559 }
1560 return true;
1561 }
1562
do_gvec_fn3_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1563 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1564 {
1565 if (a->esz == MO_64) {
1566 return false;
1567 }
1568 if (fp_access_check(s)) {
1569 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1570 }
1571 return true;
1572 }
1573
do_gvec_fn3_no8_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1574 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1575 {
1576 if (a->esz == MO_8) {
1577 return false;
1578 }
1579 return do_gvec_fn3_no64(s, a, fn);
1580 }
1581
do_gvec_fn4(DisasContext * s,arg_qrrrr_e * a,GVecGen4Fn * fn)1582 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1583 {
1584 if (!a->q && a->esz == MO_64) {
1585 return false;
1586 }
1587 if (fp_access_check(s)) {
1588 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1589 }
1590 return true;
1591 }
1592
1593 /*
1594 * This utility function is for doing register extension with an
1595 * optional shift. You will likely want to pass a temporary for the
1596 * destination register. See DecodeRegExtend() in the ARM ARM.
1597 */
ext_and_shift_reg(TCGv_i64 tcg_out,TCGv_i64 tcg_in,int option,unsigned int shift)1598 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1599 int option, unsigned int shift)
1600 {
1601 int extsize = extract32(option, 0, 2);
1602 bool is_signed = extract32(option, 2, 1);
1603
1604 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1605 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1606 }
1607
gen_check_sp_alignment(DisasContext * s)1608 static inline void gen_check_sp_alignment(DisasContext *s)
1609 {
1610 /* The AArch64 architecture mandates that (if enabled via PSTATE
1611 * or SCTLR bits) there is a check that SP is 16-aligned on every
1612 * SP-relative load or store (with an exception generated if it is not).
1613 * In line with general QEMU practice regarding misaligned accesses,
1614 * we omit these checks for the sake of guest program performance.
1615 * This function is provided as a hook so we can more easily add these
1616 * checks in future (possibly as a "favour catching guest program bugs
1617 * over speed" user selectable option).
1618 */
1619 }
1620
1621 /*
1622 * The instruction disassembly implemented here matches
1623 * the instruction encoding classifications in chapter C4
1624 * of the ARM Architecture Reference Manual (DDI0487B_a);
1625 * classification names and decode diagrams here should generally
1626 * match up with those in the manual.
1627 */
1628
trans_B(DisasContext * s,arg_i * a)1629 static bool trans_B(DisasContext *s, arg_i *a)
1630 {
1631 reset_btype(s);
1632 gen_goto_tb(s, 0, a->imm);
1633 return true;
1634 }
1635
trans_BL(DisasContext * s,arg_i * a)1636 static bool trans_BL(DisasContext *s, arg_i *a)
1637 {
1638 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1639 reset_btype(s);
1640 gen_goto_tb(s, 0, a->imm);
1641 return true;
1642 }
1643
1644
trans_CBZ(DisasContext * s,arg_cbz * a)1645 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1646 {
1647 DisasLabel match;
1648 TCGv_i64 tcg_cmp;
1649
1650 tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1651 reset_btype(s);
1652
1653 match = gen_disas_label(s);
1654 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1655 tcg_cmp, 0, match.label);
1656 gen_goto_tb(s, 0, 4);
1657 set_disas_label(s, match);
1658 gen_goto_tb(s, 1, a->imm);
1659 return true;
1660 }
1661
trans_TBZ(DisasContext * s,arg_tbz * a)1662 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1663 {
1664 DisasLabel match;
1665 TCGv_i64 tcg_cmp;
1666
1667 tcg_cmp = tcg_temp_new_i64();
1668 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1669
1670 reset_btype(s);
1671
1672 match = gen_disas_label(s);
1673 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1674 tcg_cmp, 0, match.label);
1675 gen_goto_tb(s, 0, 4);
1676 set_disas_label(s, match);
1677 gen_goto_tb(s, 1, a->imm);
1678 return true;
1679 }
1680
trans_B_cond(DisasContext * s,arg_B_cond * a)1681 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1682 {
1683 /* BC.cond is only present with FEAT_HBC */
1684 if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1685 return false;
1686 }
1687 reset_btype(s);
1688 if (a->cond < 0x0e) {
1689 /* genuinely conditional branches */
1690 DisasLabel match = gen_disas_label(s);
1691 arm_gen_test_cc(a->cond, match.label);
1692 gen_goto_tb(s, 0, 4);
1693 set_disas_label(s, match);
1694 gen_goto_tb(s, 1, a->imm);
1695 } else {
1696 /* 0xe and 0xf are both "always" conditions */
1697 gen_goto_tb(s, 0, a->imm);
1698 }
1699 return true;
1700 }
1701
set_btype_for_br(DisasContext * s,int rn)1702 static void set_btype_for_br(DisasContext *s, int rn)
1703 {
1704 if (dc_isar_feature(aa64_bti, s)) {
1705 /* BR to {x16,x17} or !guard -> 1, else 3. */
1706 if (rn == 16 || rn == 17) {
1707 set_btype(s, 1);
1708 } else {
1709 TCGv_i64 pc = tcg_temp_new_i64();
1710 gen_pc_plus_diff(s, pc, 0);
1711 gen_helper_guarded_page_br(tcg_env, pc);
1712 s->btype = -1;
1713 }
1714 }
1715 }
1716
set_btype_for_blr(DisasContext * s)1717 static void set_btype_for_blr(DisasContext *s)
1718 {
1719 if (dc_isar_feature(aa64_bti, s)) {
1720 /* BLR sets BTYPE to 2, regardless of source guarded page. */
1721 set_btype(s, 2);
1722 }
1723 }
1724
trans_BR(DisasContext * s,arg_r * a)1725 static bool trans_BR(DisasContext *s, arg_r *a)
1726 {
1727 set_btype_for_br(s, a->rn);
1728 gen_a64_set_pc(s, cpu_reg(s, a->rn));
1729 s->base.is_jmp = DISAS_JUMP;
1730 return true;
1731 }
1732
trans_BLR(DisasContext * s,arg_r * a)1733 static bool trans_BLR(DisasContext *s, arg_r *a)
1734 {
1735 TCGv_i64 dst = cpu_reg(s, a->rn);
1736 TCGv_i64 lr = cpu_reg(s, 30);
1737 if (dst == lr) {
1738 TCGv_i64 tmp = tcg_temp_new_i64();
1739 tcg_gen_mov_i64(tmp, dst);
1740 dst = tmp;
1741 }
1742 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1743 gen_a64_set_pc(s, dst);
1744 set_btype_for_blr(s);
1745 s->base.is_jmp = DISAS_JUMP;
1746 return true;
1747 }
1748
trans_RET(DisasContext * s,arg_r * a)1749 static bool trans_RET(DisasContext *s, arg_r *a)
1750 {
1751 gen_a64_set_pc(s, cpu_reg(s, a->rn));
1752 s->base.is_jmp = DISAS_JUMP;
1753 return true;
1754 }
1755
auth_branch_target(DisasContext * s,TCGv_i64 dst,TCGv_i64 modifier,bool use_key_a)1756 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1757 TCGv_i64 modifier, bool use_key_a)
1758 {
1759 TCGv_i64 truedst;
1760 /*
1761 * Return the branch target for a BRAA/RETA/etc, which is either
1762 * just the destination dst, or that value with the pauth check
1763 * done and the code removed from the high bits.
1764 */
1765 if (!s->pauth_active) {
1766 return dst;
1767 }
1768
1769 truedst = tcg_temp_new_i64();
1770 if (use_key_a) {
1771 gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1772 } else {
1773 gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1774 }
1775 return truedst;
1776 }
1777
trans_BRAZ(DisasContext * s,arg_braz * a)1778 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1779 {
1780 TCGv_i64 dst;
1781
1782 if (!dc_isar_feature(aa64_pauth, s)) {
1783 return false;
1784 }
1785
1786 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1787 set_btype_for_br(s, a->rn);
1788 gen_a64_set_pc(s, dst);
1789 s->base.is_jmp = DISAS_JUMP;
1790 return true;
1791 }
1792
trans_BLRAZ(DisasContext * s,arg_braz * a)1793 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1794 {
1795 TCGv_i64 dst, lr;
1796
1797 if (!dc_isar_feature(aa64_pauth, s)) {
1798 return false;
1799 }
1800
1801 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1802 lr = cpu_reg(s, 30);
1803 if (dst == lr) {
1804 TCGv_i64 tmp = tcg_temp_new_i64();
1805 tcg_gen_mov_i64(tmp, dst);
1806 dst = tmp;
1807 }
1808 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1809 gen_a64_set_pc(s, dst);
1810 set_btype_for_blr(s);
1811 s->base.is_jmp = DISAS_JUMP;
1812 return true;
1813 }
1814
trans_RETA(DisasContext * s,arg_reta * a)1815 static bool trans_RETA(DisasContext *s, arg_reta *a)
1816 {
1817 TCGv_i64 dst;
1818
1819 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1820 gen_a64_set_pc(s, dst);
1821 s->base.is_jmp = DISAS_JUMP;
1822 return true;
1823 }
1824
trans_BRA(DisasContext * s,arg_bra * a)1825 static bool trans_BRA(DisasContext *s, arg_bra *a)
1826 {
1827 TCGv_i64 dst;
1828
1829 if (!dc_isar_feature(aa64_pauth, s)) {
1830 return false;
1831 }
1832 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1833 gen_a64_set_pc(s, dst);
1834 set_btype_for_br(s, a->rn);
1835 s->base.is_jmp = DISAS_JUMP;
1836 return true;
1837 }
1838
trans_BLRA(DisasContext * s,arg_bra * a)1839 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1840 {
1841 TCGv_i64 dst, lr;
1842
1843 if (!dc_isar_feature(aa64_pauth, s)) {
1844 return false;
1845 }
1846 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1847 lr = cpu_reg(s, 30);
1848 if (dst == lr) {
1849 TCGv_i64 tmp = tcg_temp_new_i64();
1850 tcg_gen_mov_i64(tmp, dst);
1851 dst = tmp;
1852 }
1853 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1854 gen_a64_set_pc(s, dst);
1855 set_btype_for_blr(s);
1856 s->base.is_jmp = DISAS_JUMP;
1857 return true;
1858 }
1859
trans_ERET(DisasContext * s,arg_ERET * a)1860 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1861 {
1862 TCGv_i64 dst;
1863
1864 if (s->current_el == 0) {
1865 return false;
1866 }
1867 if (s->trap_eret) {
1868 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1869 return true;
1870 }
1871 dst = tcg_temp_new_i64();
1872 tcg_gen_ld_i64(dst, tcg_env,
1873 offsetof(CPUARMState, elr_el[s->current_el]));
1874
1875 translator_io_start(&s->base);
1876
1877 gen_helper_exception_return(tcg_env, dst);
1878 /* Must exit loop to check un-masked IRQs */
1879 s->base.is_jmp = DISAS_EXIT;
1880 return true;
1881 }
1882
trans_ERETA(DisasContext * s,arg_reta * a)1883 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1884 {
1885 TCGv_i64 dst;
1886
1887 if (!dc_isar_feature(aa64_pauth, s)) {
1888 return false;
1889 }
1890 if (s->current_el == 0) {
1891 return false;
1892 }
1893 /* The FGT trap takes precedence over an auth trap. */
1894 if (s->trap_eret) {
1895 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1896 return true;
1897 }
1898 dst = tcg_temp_new_i64();
1899 tcg_gen_ld_i64(dst, tcg_env,
1900 offsetof(CPUARMState, elr_el[s->current_el]));
1901
1902 dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1903
1904 translator_io_start(&s->base);
1905
1906 gen_helper_exception_return(tcg_env, dst);
1907 /* Must exit loop to check un-masked IRQs */
1908 s->base.is_jmp = DISAS_EXIT;
1909 return true;
1910 }
1911
trans_NOP(DisasContext * s,arg_NOP * a)1912 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1913 {
1914 return true;
1915 }
1916
trans_YIELD(DisasContext * s,arg_YIELD * a)1917 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1918 {
1919 /*
1920 * When running in MTTCG we don't generate jumps to the yield and
1921 * WFE helpers as it won't affect the scheduling of other vCPUs.
1922 * If we wanted to more completely model WFE/SEV so we don't busy
1923 * spin unnecessarily we would need to do something more involved.
1924 */
1925 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1926 s->base.is_jmp = DISAS_YIELD;
1927 }
1928 return true;
1929 }
1930
trans_WFI(DisasContext * s,arg_WFI * a)1931 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1932 {
1933 s->base.is_jmp = DISAS_WFI;
1934 return true;
1935 }
1936
trans_WFE(DisasContext * s,arg_WFI * a)1937 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1938 {
1939 /*
1940 * When running in MTTCG we don't generate jumps to the yield and
1941 * WFE helpers as it won't affect the scheduling of other vCPUs.
1942 * If we wanted to more completely model WFE/SEV so we don't busy
1943 * spin unnecessarily we would need to do something more involved.
1944 */
1945 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1946 s->base.is_jmp = DISAS_WFE;
1947 }
1948 return true;
1949 }
1950
trans_WFIT(DisasContext * s,arg_WFIT * a)1951 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1952 {
1953 if (!dc_isar_feature(aa64_wfxt, s)) {
1954 return false;
1955 }
1956
1957 /*
1958 * Because we need to pass the register value to the helper,
1959 * it's easier to emit the code now, unlike trans_WFI which
1960 * defers it to aarch64_tr_tb_stop(). That means we need to
1961 * check ss_active so that single-stepping a WFIT doesn't halt.
1962 */
1963 if (s->ss_active) {
1964 /* Act like a NOP under architectural singlestep */
1965 return true;
1966 }
1967
1968 gen_a64_update_pc(s, 4);
1969 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1970 /* Go back to the main loop to check for interrupts */
1971 s->base.is_jmp = DISAS_EXIT;
1972 return true;
1973 }
1974
trans_WFET(DisasContext * s,arg_WFET * a)1975 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1976 {
1977 if (!dc_isar_feature(aa64_wfxt, s)) {
1978 return false;
1979 }
1980
1981 /*
1982 * We rely here on our WFE implementation being a NOP, so we
1983 * don't need to do anything different to handle the WFET timeout
1984 * from what trans_WFE does.
1985 */
1986 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1987 s->base.is_jmp = DISAS_WFE;
1988 }
1989 return true;
1990 }
1991
trans_XPACLRI(DisasContext * s,arg_XPACLRI * a)1992 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1993 {
1994 if (s->pauth_active) {
1995 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1996 }
1997 return true;
1998 }
1999
trans_PACIA1716(DisasContext * s,arg_PACIA1716 * a)2000 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
2001 {
2002 if (s->pauth_active) {
2003 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2004 }
2005 return true;
2006 }
2007
trans_PACIB1716(DisasContext * s,arg_PACIB1716 * a)2008 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
2009 {
2010 if (s->pauth_active) {
2011 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2012 }
2013 return true;
2014 }
2015
trans_AUTIA1716(DisasContext * s,arg_AUTIA1716 * a)2016 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
2017 {
2018 if (s->pauth_active) {
2019 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2020 }
2021 return true;
2022 }
2023
trans_AUTIB1716(DisasContext * s,arg_AUTIB1716 * a)2024 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
2025 {
2026 if (s->pauth_active) {
2027 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2028 }
2029 return true;
2030 }
2031
trans_ESB(DisasContext * s,arg_ESB * a)2032 static bool trans_ESB(DisasContext *s, arg_ESB *a)
2033 {
2034 /* Without RAS, we must implement this as NOP. */
2035 if (dc_isar_feature(aa64_ras, s)) {
2036 /*
2037 * QEMU does not have a source of physical SErrors,
2038 * so we are only concerned with virtual SErrors.
2039 * The pseudocode in the ARM for this case is
2040 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
2041 * AArch64.vESBOperation();
2042 * Most of the condition can be evaluated at translation time.
2043 * Test for EL2 present, and defer test for SEL2 to runtime.
2044 */
2045 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
2046 gen_helper_vesb(tcg_env);
2047 }
2048 }
2049 return true;
2050 }
2051
trans_PACIAZ(DisasContext * s,arg_PACIAZ * a)2052 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
2053 {
2054 if (s->pauth_active) {
2055 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2056 }
2057 return true;
2058 }
2059
trans_PACIASP(DisasContext * s,arg_PACIASP * a)2060 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
2061 {
2062 if (s->pauth_active) {
2063 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2064 }
2065 return true;
2066 }
2067
trans_PACIBZ(DisasContext * s,arg_PACIBZ * a)2068 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
2069 {
2070 if (s->pauth_active) {
2071 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2072 }
2073 return true;
2074 }
2075
trans_PACIBSP(DisasContext * s,arg_PACIBSP * a)2076 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
2077 {
2078 if (s->pauth_active) {
2079 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2080 }
2081 return true;
2082 }
2083
trans_AUTIAZ(DisasContext * s,arg_AUTIAZ * a)2084 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
2085 {
2086 if (s->pauth_active) {
2087 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2088 }
2089 return true;
2090 }
2091
trans_AUTIASP(DisasContext * s,arg_AUTIASP * a)2092 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
2093 {
2094 if (s->pauth_active) {
2095 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2096 }
2097 return true;
2098 }
2099
trans_AUTIBZ(DisasContext * s,arg_AUTIBZ * a)2100 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
2101 {
2102 if (s->pauth_active) {
2103 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2104 }
2105 return true;
2106 }
2107
trans_AUTIBSP(DisasContext * s,arg_AUTIBSP * a)2108 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
2109 {
2110 if (s->pauth_active) {
2111 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2112 }
2113 return true;
2114 }
2115
trans_CLREX(DisasContext * s,arg_CLREX * a)2116 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
2117 {
2118 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2119 return true;
2120 }
2121
trans_DSB_DMB(DisasContext * s,arg_DSB_DMB * a)2122 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
2123 {
2124 /* We handle DSB and DMB the same way */
2125 TCGBar bar;
2126
2127 switch (a->types) {
2128 case 1: /* MBReqTypes_Reads */
2129 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
2130 break;
2131 case 2: /* MBReqTypes_Writes */
2132 bar = TCG_BAR_SC | TCG_MO_ST_ST;
2133 break;
2134 default: /* MBReqTypes_All */
2135 bar = TCG_BAR_SC | TCG_MO_ALL;
2136 break;
2137 }
2138 tcg_gen_mb(bar);
2139 return true;
2140 }
2141
trans_DSB_nXS(DisasContext * s,arg_DSB_nXS * a)2142 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
2143 {
2144 if (!dc_isar_feature(aa64_xs, s)) {
2145 return false;
2146 }
2147 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
2148 return true;
2149 }
2150
trans_ISB(DisasContext * s,arg_ISB * a)2151 static bool trans_ISB(DisasContext *s, arg_ISB *a)
2152 {
2153 /*
2154 * We need to break the TB after this insn to execute
2155 * self-modifying code correctly and also to take
2156 * any pending interrupts immediately.
2157 */
2158 reset_btype(s);
2159 gen_goto_tb(s, 0, 4);
2160 return true;
2161 }
2162
trans_SB(DisasContext * s,arg_SB * a)2163 static bool trans_SB(DisasContext *s, arg_SB *a)
2164 {
2165 if (!dc_isar_feature(aa64_sb, s)) {
2166 return false;
2167 }
2168 /*
2169 * TODO: There is no speculation barrier opcode for TCG;
2170 * MB and end the TB instead.
2171 */
2172 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2173 gen_goto_tb(s, 0, 4);
2174 return true;
2175 }
2176
trans_CFINV(DisasContext * s,arg_CFINV * a)2177 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2178 {
2179 if (!dc_isar_feature(aa64_condm_4, s)) {
2180 return false;
2181 }
2182 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2183 return true;
2184 }
2185
trans_XAFLAG(DisasContext * s,arg_XAFLAG * a)2186 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2187 {
2188 TCGv_i32 z;
2189
2190 if (!dc_isar_feature(aa64_condm_5, s)) {
2191 return false;
2192 }
2193
2194 z = tcg_temp_new_i32();
2195
2196 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2197
2198 /*
2199 * (!C & !Z) << 31
2200 * (!(C | Z)) << 31
2201 * ~((C | Z) << 31)
2202 * ~-(C | Z)
2203 * (C | Z) - 1
2204 */
2205 tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2206 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2207
2208 /* !(Z & C) */
2209 tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2210 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2211
2212 /* (!C & Z) << 31 -> -(Z & ~C) */
2213 tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2214 tcg_gen_neg_i32(cpu_VF, cpu_VF);
2215
2216 /* C | Z */
2217 tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2218
2219 return true;
2220 }
2221
trans_AXFLAG(DisasContext * s,arg_AXFLAG * a)2222 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2223 {
2224 if (!dc_isar_feature(aa64_condm_5, s)) {
2225 return false;
2226 }
2227
2228 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */
2229 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */
2230
2231 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2232 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2233
2234 tcg_gen_movi_i32(cpu_NF, 0);
2235 tcg_gen_movi_i32(cpu_VF, 0);
2236
2237 return true;
2238 }
2239
trans_MSR_i_UAO(DisasContext * s,arg_i * a)2240 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2241 {
2242 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2243 return false;
2244 }
2245 if (a->imm & 1) {
2246 set_pstate_bits(PSTATE_UAO);
2247 } else {
2248 clear_pstate_bits(PSTATE_UAO);
2249 }
2250 gen_rebuild_hflags(s);
2251 s->base.is_jmp = DISAS_TOO_MANY;
2252 return true;
2253 }
2254
trans_MSR_i_PAN(DisasContext * s,arg_i * a)2255 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2256 {
2257 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2258 return false;
2259 }
2260 if (a->imm & 1) {
2261 set_pstate_bits(PSTATE_PAN);
2262 } else {
2263 clear_pstate_bits(PSTATE_PAN);
2264 }
2265 gen_rebuild_hflags(s);
2266 s->base.is_jmp = DISAS_TOO_MANY;
2267 return true;
2268 }
2269
trans_MSR_i_SPSEL(DisasContext * s,arg_i * a)2270 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2271 {
2272 if (s->current_el == 0) {
2273 return false;
2274 }
2275 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2276 s->base.is_jmp = DISAS_TOO_MANY;
2277 return true;
2278 }
2279
trans_MSR_i_SBSS(DisasContext * s,arg_i * a)2280 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2281 {
2282 if (!dc_isar_feature(aa64_ssbs, s)) {
2283 return false;
2284 }
2285 if (a->imm & 1) {
2286 set_pstate_bits(PSTATE_SSBS);
2287 } else {
2288 clear_pstate_bits(PSTATE_SSBS);
2289 }
2290 /* Don't need to rebuild hflags since SSBS is a nop */
2291 s->base.is_jmp = DISAS_TOO_MANY;
2292 return true;
2293 }
2294
trans_MSR_i_DIT(DisasContext * s,arg_i * a)2295 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2296 {
2297 if (!dc_isar_feature(aa64_dit, s)) {
2298 return false;
2299 }
2300 if (a->imm & 1) {
2301 set_pstate_bits(PSTATE_DIT);
2302 } else {
2303 clear_pstate_bits(PSTATE_DIT);
2304 }
2305 /* There's no need to rebuild hflags because DIT is a nop */
2306 s->base.is_jmp = DISAS_TOO_MANY;
2307 return true;
2308 }
2309
trans_MSR_i_TCO(DisasContext * s,arg_i * a)2310 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2311 {
2312 if (dc_isar_feature(aa64_mte, s)) {
2313 /* Full MTE is enabled -- set the TCO bit as directed. */
2314 if (a->imm & 1) {
2315 set_pstate_bits(PSTATE_TCO);
2316 } else {
2317 clear_pstate_bits(PSTATE_TCO);
2318 }
2319 gen_rebuild_hflags(s);
2320 /* Many factors, including TCO, go into MTE_ACTIVE. */
2321 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2322 return true;
2323 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2324 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */
2325 return true;
2326 } else {
2327 /* Insn not present */
2328 return false;
2329 }
2330 }
2331
trans_MSR_i_DAIFSET(DisasContext * s,arg_i * a)2332 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2333 {
2334 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2335 s->base.is_jmp = DISAS_TOO_MANY;
2336 return true;
2337 }
2338
trans_MSR_i_DAIFCLEAR(DisasContext * s,arg_i * a)2339 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2340 {
2341 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2342 /* Exit the cpu loop to re-evaluate pending IRQs. */
2343 s->base.is_jmp = DISAS_UPDATE_EXIT;
2344 return true;
2345 }
2346
trans_MSR_i_ALLINT(DisasContext * s,arg_i * a)2347 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2348 {
2349 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2350 return false;
2351 }
2352
2353 if (a->imm == 0) {
2354 clear_pstate_bits(PSTATE_ALLINT);
2355 } else if (s->current_el > 1) {
2356 set_pstate_bits(PSTATE_ALLINT);
2357 } else {
2358 gen_helper_msr_set_allint_el1(tcg_env);
2359 }
2360
2361 /* Exit the cpu loop to re-evaluate pending IRQs. */
2362 s->base.is_jmp = DISAS_UPDATE_EXIT;
2363 return true;
2364 }
2365
trans_MSR_i_SVCR(DisasContext * s,arg_MSR_i_SVCR * a)2366 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2367 {
2368 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2369 return false;
2370 }
2371 if (sme_access_check(s)) {
2372 int old = s->pstate_sm | (s->pstate_za << 1);
2373 int new = a->imm * 3;
2374
2375 if ((old ^ new) & a->mask) {
2376 /* At least one bit changes. */
2377 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2378 tcg_constant_i32(a->mask));
2379 s->base.is_jmp = DISAS_TOO_MANY;
2380 }
2381 }
2382 return true;
2383 }
2384
gen_get_nzcv(TCGv_i64 tcg_rt)2385 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2386 {
2387 TCGv_i32 tmp = tcg_temp_new_i32();
2388 TCGv_i32 nzcv = tcg_temp_new_i32();
2389
2390 /* build bit 31, N */
2391 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2392 /* build bit 30, Z */
2393 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2394 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2395 /* build bit 29, C */
2396 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2397 /* build bit 28, V */
2398 tcg_gen_shri_i32(tmp, cpu_VF, 31);
2399 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2400 /* generate result */
2401 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2402 }
2403
gen_set_nzcv(TCGv_i64 tcg_rt)2404 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2405 {
2406 TCGv_i32 nzcv = tcg_temp_new_i32();
2407
2408 /* take NZCV from R[t] */
2409 tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2410
2411 /* bit 31, N */
2412 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2413 /* bit 30, Z */
2414 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2415 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2416 /* bit 29, C */
2417 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2418 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2419 /* bit 28, V */
2420 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2421 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2422 }
2423
gen_sysreg_undef(DisasContext * s,bool isread,uint8_t op0,uint8_t op1,uint8_t op2,uint8_t crn,uint8_t crm,uint8_t rt)2424 static void gen_sysreg_undef(DisasContext *s, bool isread,
2425 uint8_t op0, uint8_t op1, uint8_t op2,
2426 uint8_t crn, uint8_t crm, uint8_t rt)
2427 {
2428 /*
2429 * Generate code to emit an UNDEF with correct syndrome
2430 * information for a failed system register access.
2431 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2432 * but if FEAT_IDST is implemented then read accesses to registers
2433 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2434 * syndrome.
2435 */
2436 uint32_t syndrome;
2437
2438 if (isread && dc_isar_feature(aa64_ids, s) &&
2439 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2440 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2441 } else {
2442 syndrome = syn_uncategorized();
2443 }
2444 gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2445 }
2446
2447 /* MRS - move from system register
2448 * MSR (register) - move to system register
2449 * SYS
2450 * SYSL
2451 * These are all essentially the same insn in 'read' and 'write'
2452 * versions, with varying op0 fields.
2453 */
handle_sys(DisasContext * s,bool isread,unsigned int op0,unsigned int op1,unsigned int op2,unsigned int crn,unsigned int crm,unsigned int rt)2454 static void handle_sys(DisasContext *s, bool isread,
2455 unsigned int op0, unsigned int op1, unsigned int op2,
2456 unsigned int crn, unsigned int crm, unsigned int rt)
2457 {
2458 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2459 crn, crm, op0, op1, op2);
2460 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2461 bool need_exit_tb = false;
2462 bool nv_trap_to_el2 = false;
2463 bool nv_redirect_reg = false;
2464 bool skip_fp_access_checks = false;
2465 bool nv2_mem_redirect = false;
2466 TCGv_ptr tcg_ri = NULL;
2467 TCGv_i64 tcg_rt;
2468 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2469
2470 if (crn == 11 || crn == 15) {
2471 /*
2472 * Check for TIDCP trap, which must take precedence over
2473 * the UNDEF for "no such register" etc.
2474 */
2475 switch (s->current_el) {
2476 case 0:
2477 if (dc_isar_feature(aa64_tidcp1, s)) {
2478 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2479 }
2480 break;
2481 case 1:
2482 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2483 break;
2484 }
2485 }
2486
2487 if (!ri) {
2488 /* Unknown register; this might be a guest error or a QEMU
2489 * unimplemented feature.
2490 */
2491 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2492 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2493 isread ? "read" : "write", op0, op1, crn, crm, op2);
2494 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2495 return;
2496 }
2497
2498 if (s->nv2 && ri->nv2_redirect_offset) {
2499 /*
2500 * Some registers always redirect to memory; some only do so if
2501 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2502 * pairs which share an offset; see the table in R_CSRPQ).
2503 */
2504 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2505 nv2_mem_redirect = s->nv1;
2506 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2507 nv2_mem_redirect = !s->nv1;
2508 } else {
2509 nv2_mem_redirect = true;
2510 }
2511 }
2512
2513 /* Check access permissions */
2514 if (!cp_access_ok(s->current_el, ri, isread)) {
2515 /*
2516 * FEAT_NV/NV2 handling does not do the usual FP access checks
2517 * for registers only accessible at EL2 (though it *does* do them
2518 * for registers accessible at EL1).
2519 */
2520 skip_fp_access_checks = true;
2521 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2522 /*
2523 * This is one of the few EL2 registers which should redirect
2524 * to the equivalent EL1 register. We do that after running
2525 * the EL2 register's accessfn.
2526 */
2527 nv_redirect_reg = true;
2528 assert(!nv2_mem_redirect);
2529 } else if (nv2_mem_redirect) {
2530 /*
2531 * NV2 redirect-to-memory takes precedence over trap to EL2 or
2532 * UNDEF to EL1.
2533 */
2534 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2535 /*
2536 * This register / instruction exists and is an EL2 register, so
2537 * we must trap to EL2 if accessed in nested virtualization EL1
2538 * instead of UNDEFing. We'll do that after the usual access checks.
2539 * (This makes a difference only for a couple of registers like
2540 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2541 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2542 * an accessfn which does nothing when called from EL1, because
2543 * the trap-to-EL3 controls which would apply to that register
2544 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2545 */
2546 nv_trap_to_el2 = true;
2547 } else {
2548 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2549 return;
2550 }
2551 }
2552
2553 if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2554 /* Emit code to perform further access permissions checks at
2555 * runtime; this may result in an exception.
2556 */
2557 gen_a64_update_pc(s, 0);
2558 tcg_ri = tcg_temp_new_ptr();
2559 gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2560 tcg_constant_i32(key),
2561 tcg_constant_i32(syndrome),
2562 tcg_constant_i32(isread));
2563 } else if (ri->type & ARM_CP_RAISES_EXC) {
2564 /*
2565 * The readfn or writefn might raise an exception;
2566 * synchronize the CPU state in case it does.
2567 */
2568 gen_a64_update_pc(s, 0);
2569 }
2570
2571 if (!skip_fp_access_checks) {
2572 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2573 return;
2574 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2575 return;
2576 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2577 return;
2578 }
2579 }
2580
2581 if (nv_trap_to_el2) {
2582 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2583 return;
2584 }
2585
2586 if (nv_redirect_reg) {
2587 /*
2588 * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2589 * Conveniently in all cases the encoding of the EL1 register is
2590 * identical to the EL2 register except that opc1 is 0.
2591 * Get the reginfo for the EL1 register to use for the actual access.
2592 * We don't use the EL1 register's access function, and
2593 * fine-grained-traps on EL1 also do not apply here.
2594 */
2595 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2596 crn, crm, op0, 0, op2);
2597 ri = get_arm_cp_reginfo(s->cp_regs, key);
2598 assert(ri);
2599 assert(cp_access_ok(s->current_el, ri, isread));
2600 /*
2601 * We might not have done an update_pc earlier, so check we don't
2602 * need it. We could support this in future if necessary.
2603 */
2604 assert(!(ri->type & ARM_CP_RAISES_EXC));
2605 }
2606
2607 if (nv2_mem_redirect) {
2608 /*
2609 * This system register is being redirected into an EL2 memory access.
2610 * This means it is not an IO operation, doesn't change hflags,
2611 * and need not end the TB, because it has no side effects.
2612 *
2613 * The access is 64-bit single copy atomic, guaranteed aligned because
2614 * of the definition of VCNR_EL2. Its endianness depends on
2615 * SCTLR_EL2.EE, not on the data endianness of EL1.
2616 * It is done under either the EL2 translation regime or the EL2&0
2617 * translation regime, depending on HCR_EL2.E2H. It behaves as if
2618 * PSTATE.PAN is 0.
2619 */
2620 TCGv_i64 ptr = tcg_temp_new_i64();
2621 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2622 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2623 int memidx = arm_to_core_mmu_idx(armmemidx);
2624 uint32_t syn;
2625
2626 mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2627
2628 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2629 tcg_gen_addi_i64(ptr, ptr,
2630 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2631 tcg_rt = cpu_reg(s, rt);
2632
2633 syn = syn_data_abort_vncr(0, !isread, 0);
2634 disas_set_insn_syndrome(s, syn);
2635 if (isread) {
2636 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2637 } else {
2638 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2639 }
2640 return;
2641 }
2642
2643 /* Handle special cases first */
2644 switch (ri->type & ARM_CP_SPECIAL_MASK) {
2645 case 0:
2646 break;
2647 case ARM_CP_NOP:
2648 return;
2649 case ARM_CP_NZCV:
2650 tcg_rt = cpu_reg(s, rt);
2651 if (isread) {
2652 gen_get_nzcv(tcg_rt);
2653 } else {
2654 gen_set_nzcv(tcg_rt);
2655 }
2656 return;
2657 case ARM_CP_CURRENTEL:
2658 {
2659 /*
2660 * Reads as current EL value from pstate, which is
2661 * guaranteed to be constant by the tb flags.
2662 * For nested virt we should report EL2.
2663 */
2664 int el = s->nv ? 2 : s->current_el;
2665 tcg_rt = cpu_reg(s, rt);
2666 tcg_gen_movi_i64(tcg_rt, el << 2);
2667 return;
2668 }
2669 case ARM_CP_DC_ZVA:
2670 /* Writes clear the aligned block of memory which rt points into. */
2671 if (s->mte_active[0]) {
2672 int desc = 0;
2673
2674 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2675 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2676 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2677
2678 tcg_rt = tcg_temp_new_i64();
2679 gen_helper_mte_check_zva(tcg_rt, tcg_env,
2680 tcg_constant_i32(desc), cpu_reg(s, rt));
2681 } else {
2682 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2683 }
2684 gen_helper_dc_zva(tcg_env, tcg_rt);
2685 return;
2686 case ARM_CP_DC_GVA:
2687 {
2688 TCGv_i64 clean_addr, tag;
2689
2690 /*
2691 * DC_GVA, like DC_ZVA, requires that we supply the original
2692 * pointer for an invalid page. Probe that address first.
2693 */
2694 tcg_rt = cpu_reg(s, rt);
2695 clean_addr = clean_data_tbi(s, tcg_rt);
2696 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2697
2698 if (s->ata[0]) {
2699 /* Extract the tag from the register to match STZGM. */
2700 tag = tcg_temp_new_i64();
2701 tcg_gen_shri_i64(tag, tcg_rt, 56);
2702 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2703 }
2704 }
2705 return;
2706 case ARM_CP_DC_GZVA:
2707 {
2708 TCGv_i64 clean_addr, tag;
2709
2710 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2711 tcg_rt = cpu_reg(s, rt);
2712 clean_addr = clean_data_tbi(s, tcg_rt);
2713 gen_helper_dc_zva(tcg_env, clean_addr);
2714
2715 if (s->ata[0]) {
2716 /* Extract the tag from the register to match STZGM. */
2717 tag = tcg_temp_new_i64();
2718 tcg_gen_shri_i64(tag, tcg_rt, 56);
2719 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2720 }
2721 }
2722 return;
2723 default:
2724 g_assert_not_reached();
2725 }
2726
2727 if (ri->type & ARM_CP_IO) {
2728 /* I/O operations must end the TB here (whether read or write) */
2729 need_exit_tb = translator_io_start(&s->base);
2730 }
2731
2732 tcg_rt = cpu_reg(s, rt);
2733
2734 if (isread) {
2735 if (ri->type & ARM_CP_CONST) {
2736 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2737 } else if (ri->readfn) {
2738 if (!tcg_ri) {
2739 tcg_ri = gen_lookup_cp_reg(key);
2740 }
2741 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2742 } else {
2743 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2744 }
2745 } else {
2746 if (ri->type & ARM_CP_CONST) {
2747 /* If not forbidden by access permissions, treat as WI */
2748 return;
2749 } else if (ri->writefn) {
2750 if (!tcg_ri) {
2751 tcg_ri = gen_lookup_cp_reg(key);
2752 }
2753 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2754 } else {
2755 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2756 }
2757 }
2758
2759 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2760 /*
2761 * A write to any coprocessor register that ends a TB
2762 * must rebuild the hflags for the next TB.
2763 */
2764 gen_rebuild_hflags(s);
2765 /*
2766 * We default to ending the TB on a coprocessor register write,
2767 * but allow this to be suppressed by the register definition
2768 * (usually only necessary to work around guest bugs).
2769 */
2770 need_exit_tb = true;
2771 }
2772 if (need_exit_tb) {
2773 s->base.is_jmp = DISAS_UPDATE_EXIT;
2774 }
2775 }
2776
trans_SYS(DisasContext * s,arg_SYS * a)2777 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2778 {
2779 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2780 return true;
2781 }
2782
trans_SVC(DisasContext * s,arg_i * a)2783 static bool trans_SVC(DisasContext *s, arg_i *a)
2784 {
2785 /*
2786 * For SVC, HVC and SMC we advance the single-step state
2787 * machine before taking the exception. This is architecturally
2788 * mandated, to ensure that single-stepping a system call
2789 * instruction works properly.
2790 */
2791 uint32_t syndrome = syn_aa64_svc(a->imm);
2792 if (s->fgt_svc) {
2793 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2794 return true;
2795 }
2796 gen_ss_advance(s);
2797 gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2798 return true;
2799 }
2800
trans_HVC(DisasContext * s,arg_i * a)2801 static bool trans_HVC(DisasContext *s, arg_i *a)
2802 {
2803 int target_el = s->current_el == 3 ? 3 : 2;
2804
2805 if (s->current_el == 0) {
2806 unallocated_encoding(s);
2807 return true;
2808 }
2809 /*
2810 * The pre HVC helper handles cases when HVC gets trapped
2811 * as an undefined insn by runtime configuration.
2812 */
2813 gen_a64_update_pc(s, 0);
2814 gen_helper_pre_hvc(tcg_env);
2815 /* Architecture requires ss advance before we do the actual work */
2816 gen_ss_advance(s);
2817 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2818 return true;
2819 }
2820
trans_SMC(DisasContext * s,arg_i * a)2821 static bool trans_SMC(DisasContext *s, arg_i *a)
2822 {
2823 if (s->current_el == 0) {
2824 unallocated_encoding(s);
2825 return true;
2826 }
2827 gen_a64_update_pc(s, 0);
2828 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2829 /* Architecture requires ss advance before we do the actual work */
2830 gen_ss_advance(s);
2831 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2832 return true;
2833 }
2834
trans_BRK(DisasContext * s,arg_i * a)2835 static bool trans_BRK(DisasContext *s, arg_i *a)
2836 {
2837 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2838 return true;
2839 }
2840
trans_HLT(DisasContext * s,arg_i * a)2841 static bool trans_HLT(DisasContext *s, arg_i *a)
2842 {
2843 /*
2844 * HLT. This has two purposes.
2845 * Architecturally, it is an external halting debug instruction.
2846 * Since QEMU doesn't implement external debug, we treat this as
2847 * it is required for halting debug disabled: it will UNDEF.
2848 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2849 */
2850 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2851 gen_exception_internal_insn(s, EXCP_SEMIHOST);
2852 } else {
2853 unallocated_encoding(s);
2854 }
2855 return true;
2856 }
2857
2858 /*
2859 * Load/Store exclusive instructions are implemented by remembering
2860 * the value/address loaded, and seeing if these are the same
2861 * when the store is performed. This is not actually the architecturally
2862 * mandated semantics, but it works for typical guest code sequences
2863 * and avoids having to monitor regular stores.
2864 *
2865 * The store exclusive uses the atomic cmpxchg primitives to avoid
2866 * races in multi-threaded linux-user and when MTTCG softmmu is
2867 * enabled.
2868 */
gen_load_exclusive(DisasContext * s,int rt,int rt2,int rn,int size,bool is_pair)2869 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2870 int size, bool is_pair)
2871 {
2872 int idx = get_mem_index(s);
2873 TCGv_i64 dirty_addr, clean_addr;
2874 MemOp memop = check_atomic_align(s, rn, size + is_pair);
2875
2876 s->is_ldex = true;
2877 dirty_addr = cpu_reg_sp(s, rn);
2878 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2879
2880 g_assert(size <= 3);
2881 if (is_pair) {
2882 g_assert(size >= 2);
2883 if (size == 2) {
2884 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2885 if (s->be_data == MO_LE) {
2886 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2887 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2888 } else {
2889 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2890 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2891 }
2892 } else {
2893 TCGv_i128 t16 = tcg_temp_new_i128();
2894
2895 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2896
2897 if (s->be_data == MO_LE) {
2898 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2899 cpu_exclusive_high, t16);
2900 } else {
2901 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2902 cpu_exclusive_val, t16);
2903 }
2904 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2905 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2906 }
2907 } else {
2908 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2909 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2910 }
2911 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2912 }
2913
gen_store_exclusive(DisasContext * s,int rd,int rt,int rt2,int rn,int size,int is_pair)2914 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2915 int rn, int size, int is_pair)
2916 {
2917 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2918 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
2919 * [addr] = {Rt};
2920 * if (is_pair) {
2921 * [addr + datasize] = {Rt2};
2922 * }
2923 * {Rd} = 0;
2924 * } else {
2925 * {Rd} = 1;
2926 * }
2927 * env->exclusive_addr = -1;
2928 */
2929 TCGLabel *fail_label = gen_new_label();
2930 TCGLabel *done_label = gen_new_label();
2931 TCGv_i64 tmp, clean_addr;
2932 MemOp memop;
2933
2934 /*
2935 * FIXME: We are out of spec here. We have recorded only the address
2936 * from load_exclusive, not the entire range, and we assume that the
2937 * size of the access on both sides match. The architecture allows the
2938 * store to be smaller than the load, so long as the stored bytes are
2939 * within the range recorded by the load.
2940 */
2941
2942 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2943 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2944 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2945
2946 /*
2947 * The write, and any associated faults, only happen if the virtual
2948 * and physical addresses pass the exclusive monitor check. These
2949 * faults are exceedingly unlikely, because normally the guest uses
2950 * the exact same address register for the load_exclusive, and we
2951 * would have recognized these faults there.
2952 *
2953 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2954 * unaligned 4-byte write within the range of an aligned 8-byte load.
2955 * With LSE2, the store would need to cross a 16-byte boundary when the
2956 * load did not, which would mean the store is outside the range
2957 * recorded for the monitor, which would have failed a corrected monitor
2958 * check above. For now, we assume no size change and retain the
2959 * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2960 *
2961 * It is possible to trigger an MTE fault, by performing the load with
2962 * a virtual address with a valid tag and performing the store with the
2963 * same virtual address and a different invalid tag.
2964 */
2965 memop = size + is_pair;
2966 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2967 memop |= MO_ALIGN;
2968 }
2969 memop = finalize_memop(s, memop);
2970 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2971
2972 tmp = tcg_temp_new_i64();
2973 if (is_pair) {
2974 if (size == 2) {
2975 if (s->be_data == MO_LE) {
2976 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2977 } else {
2978 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2979 }
2980 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2981 cpu_exclusive_val, tmp,
2982 get_mem_index(s), memop);
2983 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2984 } else {
2985 TCGv_i128 t16 = tcg_temp_new_i128();
2986 TCGv_i128 c16 = tcg_temp_new_i128();
2987 TCGv_i64 a, b;
2988
2989 if (s->be_data == MO_LE) {
2990 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2991 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2992 cpu_exclusive_high);
2993 } else {
2994 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2995 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2996 cpu_exclusive_val);
2997 }
2998
2999 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
3000 get_mem_index(s), memop);
3001
3002 a = tcg_temp_new_i64();
3003 b = tcg_temp_new_i64();
3004 if (s->be_data == MO_LE) {
3005 tcg_gen_extr_i128_i64(a, b, t16);
3006 } else {
3007 tcg_gen_extr_i128_i64(b, a, t16);
3008 }
3009
3010 tcg_gen_xor_i64(a, a, cpu_exclusive_val);
3011 tcg_gen_xor_i64(b, b, cpu_exclusive_high);
3012 tcg_gen_or_i64(tmp, a, b);
3013
3014 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
3015 }
3016 } else {
3017 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
3018 cpu_reg(s, rt), get_mem_index(s), memop);
3019 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
3020 }
3021 tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
3022 tcg_gen_br(done_label);
3023
3024 gen_set_label(fail_label);
3025 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
3026 gen_set_label(done_label);
3027 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
3028 }
3029
gen_compare_and_swap(DisasContext * s,int rs,int rt,int rn,int size)3030 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
3031 int rn, int size)
3032 {
3033 TCGv_i64 tcg_rs = cpu_reg(s, rs);
3034 TCGv_i64 tcg_rt = cpu_reg(s, rt);
3035 int memidx = get_mem_index(s);
3036 TCGv_i64 clean_addr;
3037 MemOp memop;
3038
3039 if (rn == 31) {
3040 gen_check_sp_alignment(s);
3041 }
3042 memop = check_atomic_align(s, rn, size);
3043 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3044 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
3045 memidx, memop);
3046 }
3047
gen_compare_and_swap_pair(DisasContext * s,int rs,int rt,int rn,int size)3048 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
3049 int rn, int size)
3050 {
3051 TCGv_i64 s1 = cpu_reg(s, rs);
3052 TCGv_i64 s2 = cpu_reg(s, rs + 1);
3053 TCGv_i64 t1 = cpu_reg(s, rt);
3054 TCGv_i64 t2 = cpu_reg(s, rt + 1);
3055 TCGv_i64 clean_addr;
3056 int memidx = get_mem_index(s);
3057 MemOp memop;
3058
3059 if (rn == 31) {
3060 gen_check_sp_alignment(s);
3061 }
3062
3063 /* This is a single atomic access, despite the "pair". */
3064 memop = check_atomic_align(s, rn, size + 1);
3065 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3066
3067 if (size == 2) {
3068 TCGv_i64 cmp = tcg_temp_new_i64();
3069 TCGv_i64 val = tcg_temp_new_i64();
3070
3071 if (s->be_data == MO_LE) {
3072 tcg_gen_concat32_i64(val, t1, t2);
3073 tcg_gen_concat32_i64(cmp, s1, s2);
3074 } else {
3075 tcg_gen_concat32_i64(val, t2, t1);
3076 tcg_gen_concat32_i64(cmp, s2, s1);
3077 }
3078
3079 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
3080
3081 if (s->be_data == MO_LE) {
3082 tcg_gen_extr32_i64(s1, s2, cmp);
3083 } else {
3084 tcg_gen_extr32_i64(s2, s1, cmp);
3085 }
3086 } else {
3087 TCGv_i128 cmp = tcg_temp_new_i128();
3088 TCGv_i128 val = tcg_temp_new_i128();
3089
3090 if (s->be_data == MO_LE) {
3091 tcg_gen_concat_i64_i128(val, t1, t2);
3092 tcg_gen_concat_i64_i128(cmp, s1, s2);
3093 } else {
3094 tcg_gen_concat_i64_i128(val, t2, t1);
3095 tcg_gen_concat_i64_i128(cmp, s2, s1);
3096 }
3097
3098 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
3099
3100 if (s->be_data == MO_LE) {
3101 tcg_gen_extr_i128_i64(s1, s2, cmp);
3102 } else {
3103 tcg_gen_extr_i128_i64(s2, s1, cmp);
3104 }
3105 }
3106 }
3107
3108 /*
3109 * Compute the ISS.SF bit for syndrome information if an exception
3110 * is taken on a load or store. This indicates whether the instruction
3111 * is accessing a 32-bit or 64-bit register. This logic is derived
3112 * from the ARMv8 specs for LDR (Shared decode for all encodings).
3113 */
ldst_iss_sf(int size,bool sign,bool ext)3114 static bool ldst_iss_sf(int size, bool sign, bool ext)
3115 {
3116
3117 if (sign) {
3118 /*
3119 * Signed loads are 64 bit results if we are not going to
3120 * do a zero-extend from 32 to 64 after the load.
3121 * (For a store, sign and ext are always false.)
3122 */
3123 return !ext;
3124 } else {
3125 /* Unsigned loads/stores work at the specified size */
3126 return size == MO_64;
3127 }
3128 }
3129
trans_STXR(DisasContext * s,arg_stxr * a)3130 static bool trans_STXR(DisasContext *s, arg_stxr *a)
3131 {
3132 if (a->rn == 31) {
3133 gen_check_sp_alignment(s);
3134 }
3135 if (a->lasr) {
3136 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3137 }
3138 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
3139 return true;
3140 }
3141
trans_LDXR(DisasContext * s,arg_stxr * a)3142 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
3143 {
3144 if (a->rn == 31) {
3145 gen_check_sp_alignment(s);
3146 }
3147 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
3148 if (a->lasr) {
3149 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3150 }
3151 return true;
3152 }
3153
trans_STLR(DisasContext * s,arg_stlr * a)3154 static bool trans_STLR(DisasContext *s, arg_stlr *a)
3155 {
3156 TCGv_i64 clean_addr;
3157 MemOp memop;
3158 bool iss_sf = ldst_iss_sf(a->sz, false, false);
3159
3160 /*
3161 * StoreLORelease is the same as Store-Release for QEMU, but
3162 * needs the feature-test.
3163 */
3164 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3165 return false;
3166 }
3167 /* Generate ISS for non-exclusive accesses including LASR. */
3168 if (a->rn == 31) {
3169 gen_check_sp_alignment(s);
3170 }
3171 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3172 memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3173 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3174 true, a->rn != 31, memop);
3175 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3176 iss_sf, a->lasr);
3177 return true;
3178 }
3179
trans_LDAR(DisasContext * s,arg_stlr * a)3180 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3181 {
3182 TCGv_i64 clean_addr;
3183 MemOp memop;
3184 bool iss_sf = ldst_iss_sf(a->sz, false, false);
3185
3186 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
3187 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3188 return false;
3189 }
3190 /* Generate ISS for non-exclusive accesses including LASR. */
3191 if (a->rn == 31) {
3192 gen_check_sp_alignment(s);
3193 }
3194 memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3195 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3196 false, a->rn != 31, memop);
3197 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3198 a->rt, iss_sf, a->lasr);
3199 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3200 return true;
3201 }
3202
trans_STXP(DisasContext * s,arg_stxr * a)3203 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3204 {
3205 if (a->rn == 31) {
3206 gen_check_sp_alignment(s);
3207 }
3208 if (a->lasr) {
3209 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3210 }
3211 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3212 return true;
3213 }
3214
trans_LDXP(DisasContext * s,arg_stxr * a)3215 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3216 {
3217 if (a->rn == 31) {
3218 gen_check_sp_alignment(s);
3219 }
3220 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3221 if (a->lasr) {
3222 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3223 }
3224 return true;
3225 }
3226
trans_CASP(DisasContext * s,arg_CASP * a)3227 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3228 {
3229 if (!dc_isar_feature(aa64_atomics, s)) {
3230 return false;
3231 }
3232 if (((a->rt | a->rs) & 1) != 0) {
3233 return false;
3234 }
3235
3236 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3237 return true;
3238 }
3239
trans_CAS(DisasContext * s,arg_CAS * a)3240 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3241 {
3242 if (!dc_isar_feature(aa64_atomics, s)) {
3243 return false;
3244 }
3245 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3246 return true;
3247 }
3248
trans_LD_lit(DisasContext * s,arg_ldlit * a)3249 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3250 {
3251 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3252 TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3253 TCGv_i64 clean_addr = tcg_temp_new_i64();
3254 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3255
3256 gen_pc_plus_diff(s, clean_addr, a->imm);
3257 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3258 false, true, a->rt, iss_sf, false);
3259 return true;
3260 }
3261
trans_LD_lit_v(DisasContext * s,arg_ldlit * a)3262 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3263 {
3264 /* Load register (literal), vector version */
3265 TCGv_i64 clean_addr;
3266 MemOp memop;
3267
3268 if (!fp_access_check(s)) {
3269 return true;
3270 }
3271 memop = finalize_memop_asimd(s, a->sz);
3272 clean_addr = tcg_temp_new_i64();
3273 gen_pc_plus_diff(s, clean_addr, a->imm);
3274 do_fp_ld(s, a->rt, clean_addr, memop);
3275 return true;
3276 }
3277
op_addr_ldstpair_pre(DisasContext * s,arg_ldstpair * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3278 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3279 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3280 uint64_t offset, bool is_store, MemOp mop)
3281 {
3282 if (a->rn == 31) {
3283 gen_check_sp_alignment(s);
3284 }
3285
3286 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3287 if (!a->p) {
3288 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3289 }
3290
3291 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3292 (a->w || a->rn != 31), 2 << a->sz, mop);
3293 }
3294
op_addr_ldstpair_post(DisasContext * s,arg_ldstpair * a,TCGv_i64 dirty_addr,uint64_t offset)3295 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3296 TCGv_i64 dirty_addr, uint64_t offset)
3297 {
3298 if (a->w) {
3299 if (a->p) {
3300 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3301 }
3302 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3303 }
3304 }
3305
trans_STP(DisasContext * s,arg_ldstpair * a)3306 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3307 {
3308 uint64_t offset = a->imm << a->sz;
3309 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3310 MemOp mop = finalize_memop(s, a->sz);
3311
3312 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3313 tcg_rt = cpu_reg(s, a->rt);
3314 tcg_rt2 = cpu_reg(s, a->rt2);
3315 /*
3316 * We built mop above for the single logical access -- rebuild it
3317 * now for the paired operation.
3318 *
3319 * With LSE2, non-sign-extending pairs are treated atomically if
3320 * aligned, and if unaligned one of the pair will be completely
3321 * within a 16-byte block and that element will be atomic.
3322 * Otherwise each element is separately atomic.
3323 * In all cases, issue one operation with the correct atomicity.
3324 */
3325 mop = a->sz + 1;
3326 if (s->align_mem) {
3327 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3328 }
3329 mop = finalize_memop_pair(s, mop);
3330 if (a->sz == 2) {
3331 TCGv_i64 tmp = tcg_temp_new_i64();
3332
3333 if (s->be_data == MO_LE) {
3334 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3335 } else {
3336 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3337 }
3338 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3339 } else {
3340 TCGv_i128 tmp = tcg_temp_new_i128();
3341
3342 if (s->be_data == MO_LE) {
3343 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3344 } else {
3345 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3346 }
3347 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3348 }
3349 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3350 return true;
3351 }
3352
trans_LDP(DisasContext * s,arg_ldstpair * a)3353 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3354 {
3355 uint64_t offset = a->imm << a->sz;
3356 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3357 MemOp mop = finalize_memop(s, a->sz);
3358
3359 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3360 tcg_rt = cpu_reg(s, a->rt);
3361 tcg_rt2 = cpu_reg(s, a->rt2);
3362
3363 /*
3364 * We built mop above for the single logical access -- rebuild it
3365 * now for the paired operation.
3366 *
3367 * With LSE2, non-sign-extending pairs are treated atomically if
3368 * aligned, and if unaligned one of the pair will be completely
3369 * within a 16-byte block and that element will be atomic.
3370 * Otherwise each element is separately atomic.
3371 * In all cases, issue one operation with the correct atomicity.
3372 *
3373 * This treats sign-extending loads like zero-extending loads,
3374 * since that reuses the most code below.
3375 */
3376 mop = a->sz + 1;
3377 if (s->align_mem) {
3378 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3379 }
3380 mop = finalize_memop_pair(s, mop);
3381 if (a->sz == 2) {
3382 int o2 = s->be_data == MO_LE ? 32 : 0;
3383 int o1 = o2 ^ 32;
3384
3385 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3386 if (a->sign) {
3387 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3388 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3389 } else {
3390 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3391 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3392 }
3393 } else {
3394 TCGv_i128 tmp = tcg_temp_new_i128();
3395
3396 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3397 if (s->be_data == MO_LE) {
3398 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3399 } else {
3400 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3401 }
3402 }
3403 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3404 return true;
3405 }
3406
trans_STP_v(DisasContext * s,arg_ldstpair * a)3407 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3408 {
3409 uint64_t offset = a->imm << a->sz;
3410 TCGv_i64 clean_addr, dirty_addr;
3411 MemOp mop;
3412
3413 if (!fp_access_check(s)) {
3414 return true;
3415 }
3416
3417 /* LSE2 does not merge FP pairs; leave these as separate operations. */
3418 mop = finalize_memop_asimd(s, a->sz);
3419 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3420 do_fp_st(s, a->rt, clean_addr, mop);
3421 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3422 do_fp_st(s, a->rt2, clean_addr, mop);
3423 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3424 return true;
3425 }
3426
trans_LDP_v(DisasContext * s,arg_ldstpair * a)3427 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3428 {
3429 uint64_t offset = a->imm << a->sz;
3430 TCGv_i64 clean_addr, dirty_addr;
3431 MemOp mop;
3432
3433 if (!fp_access_check(s)) {
3434 return true;
3435 }
3436
3437 /* LSE2 does not merge FP pairs; leave these as separate operations. */
3438 mop = finalize_memop_asimd(s, a->sz);
3439 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3440 do_fp_ld(s, a->rt, clean_addr, mop);
3441 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3442 do_fp_ld(s, a->rt2, clean_addr, mop);
3443 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3444 return true;
3445 }
3446
trans_STGP(DisasContext * s,arg_ldstpair * a)3447 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3448 {
3449 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3450 uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3451 MemOp mop;
3452 TCGv_i128 tmp;
3453
3454 /* STGP only comes in one size. */
3455 tcg_debug_assert(a->sz == MO_64);
3456
3457 if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3458 return false;
3459 }
3460
3461 if (a->rn == 31) {
3462 gen_check_sp_alignment(s);
3463 }
3464
3465 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3466 if (!a->p) {
3467 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3468 }
3469
3470 clean_addr = clean_data_tbi(s, dirty_addr);
3471 tcg_rt = cpu_reg(s, a->rt);
3472 tcg_rt2 = cpu_reg(s, a->rt2);
3473
3474 /*
3475 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3476 * and one tag operation. We implement it as one single aligned 16-byte
3477 * memory operation for convenience. Note that the alignment ensures
3478 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3479 */
3480 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3481
3482 tmp = tcg_temp_new_i128();
3483 if (s->be_data == MO_LE) {
3484 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3485 } else {
3486 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3487 }
3488 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3489
3490 /* Perform the tag store, if tag access enabled. */
3491 if (s->ata[0]) {
3492 if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3493 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3494 } else {
3495 gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3496 }
3497 }
3498
3499 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3500 return true;
3501 }
3502
op_addr_ldst_imm_pre(DisasContext * s,arg_ldst_imm * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3503 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3504 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3505 uint64_t offset, bool is_store, MemOp mop)
3506 {
3507 int memidx;
3508
3509 if (a->rn == 31) {
3510 gen_check_sp_alignment(s);
3511 }
3512
3513 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3514 if (!a->p) {
3515 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3516 }
3517 memidx = get_a64_user_mem_index(s, a->unpriv);
3518 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3519 a->w || a->rn != 31,
3520 mop, a->unpriv, memidx);
3521 }
3522
op_addr_ldst_imm_post(DisasContext * s,arg_ldst_imm * a,TCGv_i64 dirty_addr,uint64_t offset)3523 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3524 TCGv_i64 dirty_addr, uint64_t offset)
3525 {
3526 if (a->w) {
3527 if (a->p) {
3528 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3529 }
3530 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3531 }
3532 }
3533
trans_STR_i(DisasContext * s,arg_ldst_imm * a)3534 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3535 {
3536 bool iss_sf, iss_valid = !a->w;
3537 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3538 int memidx = get_a64_user_mem_index(s, a->unpriv);
3539 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3540
3541 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3542
3543 tcg_rt = cpu_reg(s, a->rt);
3544 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3545
3546 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3547 iss_valid, a->rt, iss_sf, false);
3548 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3549 return true;
3550 }
3551
trans_LDR_i(DisasContext * s,arg_ldst_imm * a)3552 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3553 {
3554 bool iss_sf, iss_valid = !a->w;
3555 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3556 int memidx = get_a64_user_mem_index(s, a->unpriv);
3557 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3558
3559 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3560
3561 tcg_rt = cpu_reg(s, a->rt);
3562 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3563
3564 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3565 a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3566 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3567 return true;
3568 }
3569
trans_STR_v_i(DisasContext * s,arg_ldst_imm * a)3570 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3571 {
3572 TCGv_i64 clean_addr, dirty_addr;
3573 MemOp mop;
3574
3575 if (!fp_access_check(s)) {
3576 return true;
3577 }
3578 mop = finalize_memop_asimd(s, a->sz);
3579 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3580 do_fp_st(s, a->rt, clean_addr, mop);
3581 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3582 return true;
3583 }
3584
trans_LDR_v_i(DisasContext * s,arg_ldst_imm * a)3585 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3586 {
3587 TCGv_i64 clean_addr, dirty_addr;
3588 MemOp mop;
3589
3590 if (!fp_access_check(s)) {
3591 return true;
3592 }
3593 mop = finalize_memop_asimd(s, a->sz);
3594 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3595 do_fp_ld(s, a->rt, clean_addr, mop);
3596 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3597 return true;
3598 }
3599
op_addr_ldst_pre(DisasContext * s,arg_ldst * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,bool is_store,MemOp memop)3600 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3601 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3602 bool is_store, MemOp memop)
3603 {
3604 TCGv_i64 tcg_rm;
3605
3606 if (a->rn == 31) {
3607 gen_check_sp_alignment(s);
3608 }
3609 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3610
3611 tcg_rm = read_cpu_reg(s, a->rm, 1);
3612 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3613
3614 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3615 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3616 }
3617
trans_LDR(DisasContext * s,arg_ldst * a)3618 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3619 {
3620 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3621 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3622 MemOp memop;
3623
3624 if (extract32(a->opt, 1, 1) == 0) {
3625 return false;
3626 }
3627
3628 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3629 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3630 tcg_rt = cpu_reg(s, a->rt);
3631 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3632 a->ext, true, a->rt, iss_sf, false);
3633 return true;
3634 }
3635
trans_STR(DisasContext * s,arg_ldst * a)3636 static bool trans_STR(DisasContext *s, arg_ldst *a)
3637 {
3638 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3639 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3640 MemOp memop;
3641
3642 if (extract32(a->opt, 1, 1) == 0) {
3643 return false;
3644 }
3645
3646 memop = finalize_memop(s, a->sz);
3647 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3648 tcg_rt = cpu_reg(s, a->rt);
3649 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3650 return true;
3651 }
3652
trans_LDR_v(DisasContext * s,arg_ldst * a)3653 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3654 {
3655 TCGv_i64 clean_addr, dirty_addr;
3656 MemOp memop;
3657
3658 if (extract32(a->opt, 1, 1) == 0) {
3659 return false;
3660 }
3661
3662 if (!fp_access_check(s)) {
3663 return true;
3664 }
3665
3666 memop = finalize_memop_asimd(s, a->sz);
3667 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3668 do_fp_ld(s, a->rt, clean_addr, memop);
3669 return true;
3670 }
3671
trans_STR_v(DisasContext * s,arg_ldst * a)3672 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3673 {
3674 TCGv_i64 clean_addr, dirty_addr;
3675 MemOp memop;
3676
3677 if (extract32(a->opt, 1, 1) == 0) {
3678 return false;
3679 }
3680
3681 if (!fp_access_check(s)) {
3682 return true;
3683 }
3684
3685 memop = finalize_memop_asimd(s, a->sz);
3686 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3687 do_fp_st(s, a->rt, clean_addr, memop);
3688 return true;
3689 }
3690
3691
do_atomic_ld(DisasContext * s,arg_atomic * a,AtomicThreeOpFn * fn,int sign,bool invert)3692 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3693 int sign, bool invert)
3694 {
3695 MemOp mop = a->sz | sign;
3696 TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3697
3698 if (a->rn == 31) {
3699 gen_check_sp_alignment(s);
3700 }
3701 mop = check_atomic_align(s, a->rn, mop);
3702 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3703 a->rn != 31, mop);
3704 tcg_rs = read_cpu_reg(s, a->rs, true);
3705 tcg_rt = cpu_reg(s, a->rt);
3706 if (invert) {
3707 tcg_gen_not_i64(tcg_rs, tcg_rs);
3708 }
3709 /*
3710 * The tcg atomic primitives are all full barriers. Therefore we
3711 * can ignore the Acquire and Release bits of this instruction.
3712 */
3713 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3714
3715 if (mop & MO_SIGN) {
3716 switch (a->sz) {
3717 case MO_8:
3718 tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3719 break;
3720 case MO_16:
3721 tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3722 break;
3723 case MO_32:
3724 tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3725 break;
3726 case MO_64:
3727 break;
3728 default:
3729 g_assert_not_reached();
3730 }
3731 }
3732 return true;
3733 }
3734
3735 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3736 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3737 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3738 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
TRANS_FEAT(LDSMAX,aa64_atomics,do_atomic_ld,a,tcg_gen_atomic_fetch_smax_i64,MO_SIGN,false)3739 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3740 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3741 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3742 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3743 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3744
3745 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3746 {
3747 bool iss_sf = ldst_iss_sf(a->sz, false, false);
3748 TCGv_i64 clean_addr;
3749 MemOp mop;
3750
3751 if (!dc_isar_feature(aa64_atomics, s) ||
3752 !dc_isar_feature(aa64_rcpc_8_3, s)) {
3753 return false;
3754 }
3755 if (a->rn == 31) {
3756 gen_check_sp_alignment(s);
3757 }
3758 mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3759 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3760 a->rn != 31, mop);
3761 /*
3762 * LDAPR* are a special case because they are a simple load, not a
3763 * fetch-and-do-something op.
3764 * The architectural consistency requirements here are weaker than
3765 * full load-acquire (we only need "load-acquire processor consistent"),
3766 * but we choose to implement them as full LDAQ.
3767 */
3768 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3769 true, a->rt, iss_sf, true);
3770 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3771 return true;
3772 }
3773
trans_LDRA(DisasContext * s,arg_LDRA * a)3774 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3775 {
3776 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3777 MemOp memop;
3778
3779 /* Load with pointer authentication */
3780 if (!dc_isar_feature(aa64_pauth, s)) {
3781 return false;
3782 }
3783
3784 if (a->rn == 31) {
3785 gen_check_sp_alignment(s);
3786 }
3787 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3788
3789 if (s->pauth_active) {
3790 if (!a->m) {
3791 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3792 tcg_constant_i64(0));
3793 } else {
3794 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3795 tcg_constant_i64(0));
3796 }
3797 }
3798
3799 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3800
3801 memop = finalize_memop(s, MO_64);
3802
3803 /* Note that "clean" and "dirty" here refer to TBI not PAC. */
3804 clean_addr = gen_mte_check1(s, dirty_addr, false,
3805 a->w || a->rn != 31, memop);
3806
3807 tcg_rt = cpu_reg(s, a->rt);
3808 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3809 /* extend */ false, /* iss_valid */ !a->w,
3810 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3811
3812 if (a->w) {
3813 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3814 }
3815 return true;
3816 }
3817
trans_LDAPR_i(DisasContext * s,arg_ldapr_stlr_i * a)3818 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3819 {
3820 TCGv_i64 clean_addr, dirty_addr;
3821 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3822 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3823
3824 if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3825 return false;
3826 }
3827
3828 if (a->rn == 31) {
3829 gen_check_sp_alignment(s);
3830 }
3831
3832 mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3833 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3834 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3835 clean_addr = clean_data_tbi(s, dirty_addr);
3836
3837 /*
3838 * Load-AcquirePC semantics; we implement as the slightly more
3839 * restrictive Load-Acquire.
3840 */
3841 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3842 a->rt, iss_sf, true);
3843 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3844 return true;
3845 }
3846
trans_STLR_i(DisasContext * s,arg_ldapr_stlr_i * a)3847 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3848 {
3849 TCGv_i64 clean_addr, dirty_addr;
3850 MemOp mop = a->sz;
3851 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3852
3853 if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3854 return false;
3855 }
3856
3857 /* TODO: ARMv8.4-LSE SCTLR.nAA */
3858
3859 if (a->rn == 31) {
3860 gen_check_sp_alignment(s);
3861 }
3862
3863 mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3864 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3865 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3866 clean_addr = clean_data_tbi(s, dirty_addr);
3867
3868 /* Store-Release semantics */
3869 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3870 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3871 return true;
3872 }
3873
trans_LD_mult(DisasContext * s,arg_ldst_mult * a)3874 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3875 {
3876 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3877 MemOp endian, align, mop;
3878
3879 int total; /* total bytes */
3880 int elements; /* elements per vector */
3881 int r;
3882 int size = a->sz;
3883
3884 if (!a->p && a->rm != 0) {
3885 /* For non-postindexed accesses the Rm field must be 0 */
3886 return false;
3887 }
3888 if (size == 3 && !a->q && a->selem != 1) {
3889 return false;
3890 }
3891 if (!fp_access_check(s)) {
3892 return true;
3893 }
3894
3895 if (a->rn == 31) {
3896 gen_check_sp_alignment(s);
3897 }
3898
3899 /* For our purposes, bytes are always little-endian. */
3900 endian = s->be_data;
3901 if (size == 0) {
3902 endian = MO_LE;
3903 }
3904
3905 total = a->rpt * a->selem * (a->q ? 16 : 8);
3906 tcg_rn = cpu_reg_sp(s, a->rn);
3907
3908 /*
3909 * Issue the MTE check vs the logical repeat count, before we
3910 * promote consecutive little-endian elements below.
3911 */
3912 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3913 finalize_memop_asimd(s, size));
3914
3915 /*
3916 * Consecutive little-endian elements from a single register
3917 * can be promoted to a larger little-endian operation.
3918 */
3919 align = MO_ALIGN;
3920 if (a->selem == 1 && endian == MO_LE) {
3921 align = pow2_align(size);
3922 size = 3;
3923 }
3924 if (!s->align_mem) {
3925 align = 0;
3926 }
3927 mop = endian | size | align;
3928
3929 elements = (a->q ? 16 : 8) >> size;
3930 tcg_ebytes = tcg_constant_i64(1 << size);
3931 for (r = 0; r < a->rpt; r++) {
3932 int e;
3933 for (e = 0; e < elements; e++) {
3934 int xs;
3935 for (xs = 0; xs < a->selem; xs++) {
3936 int tt = (a->rt + r + xs) % 32;
3937 do_vec_ld(s, tt, e, clean_addr, mop);
3938 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3939 }
3940 }
3941 }
3942
3943 /*
3944 * For non-quad operations, setting a slice of the low 64 bits of
3945 * the register clears the high 64 bits (in the ARM ARM pseudocode
3946 * this is implicit in the fact that 'rval' is a 64 bit wide
3947 * variable). For quad operations, we might still need to zero
3948 * the high bits of SVE.
3949 */
3950 for (r = 0; r < a->rpt * a->selem; r++) {
3951 int tt = (a->rt + r) % 32;
3952 clear_vec_high(s, a->q, tt);
3953 }
3954
3955 if (a->p) {
3956 if (a->rm == 31) {
3957 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3958 } else {
3959 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3960 }
3961 }
3962 return true;
3963 }
3964
trans_ST_mult(DisasContext * s,arg_ldst_mult * a)3965 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3966 {
3967 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3968 MemOp endian, align, mop;
3969
3970 int total; /* total bytes */
3971 int elements; /* elements per vector */
3972 int r;
3973 int size = a->sz;
3974
3975 if (!a->p && a->rm != 0) {
3976 /* For non-postindexed accesses the Rm field must be 0 */
3977 return false;
3978 }
3979 if (size == 3 && !a->q && a->selem != 1) {
3980 return false;
3981 }
3982 if (!fp_access_check(s)) {
3983 return true;
3984 }
3985
3986 if (a->rn == 31) {
3987 gen_check_sp_alignment(s);
3988 }
3989
3990 /* For our purposes, bytes are always little-endian. */
3991 endian = s->be_data;
3992 if (size == 0) {
3993 endian = MO_LE;
3994 }
3995
3996 total = a->rpt * a->selem * (a->q ? 16 : 8);
3997 tcg_rn = cpu_reg_sp(s, a->rn);
3998
3999 /*
4000 * Issue the MTE check vs the logical repeat count, before we
4001 * promote consecutive little-endian elements below.
4002 */
4003 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
4004 finalize_memop_asimd(s, size));
4005
4006 /*
4007 * Consecutive little-endian elements from a single register
4008 * can be promoted to a larger little-endian operation.
4009 */
4010 align = MO_ALIGN;
4011 if (a->selem == 1 && endian == MO_LE) {
4012 align = pow2_align(size);
4013 size = 3;
4014 }
4015 if (!s->align_mem) {
4016 align = 0;
4017 }
4018 mop = endian | size | align;
4019
4020 elements = (a->q ? 16 : 8) >> size;
4021 tcg_ebytes = tcg_constant_i64(1 << size);
4022 for (r = 0; r < a->rpt; r++) {
4023 int e;
4024 for (e = 0; e < elements; e++) {
4025 int xs;
4026 for (xs = 0; xs < a->selem; xs++) {
4027 int tt = (a->rt + r + xs) % 32;
4028 do_vec_st(s, tt, e, clean_addr, mop);
4029 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4030 }
4031 }
4032 }
4033
4034 if (a->p) {
4035 if (a->rm == 31) {
4036 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4037 } else {
4038 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4039 }
4040 }
4041 return true;
4042 }
4043
trans_ST_single(DisasContext * s,arg_ldst_single * a)4044 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
4045 {
4046 int xs, total, rt;
4047 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4048 MemOp mop;
4049
4050 if (!a->p && a->rm != 0) {
4051 return false;
4052 }
4053 if (!fp_access_check(s)) {
4054 return true;
4055 }
4056
4057 if (a->rn == 31) {
4058 gen_check_sp_alignment(s);
4059 }
4060
4061 total = a->selem << a->scale;
4062 tcg_rn = cpu_reg_sp(s, a->rn);
4063
4064 mop = finalize_memop_asimd(s, a->scale);
4065 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
4066 total, mop);
4067
4068 tcg_ebytes = tcg_constant_i64(1 << a->scale);
4069 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4070 do_vec_st(s, rt, a->index, clean_addr, mop);
4071 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4072 }
4073
4074 if (a->p) {
4075 if (a->rm == 31) {
4076 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4077 } else {
4078 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4079 }
4080 }
4081 return true;
4082 }
4083
trans_LD_single(DisasContext * s,arg_ldst_single * a)4084 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
4085 {
4086 int xs, total, rt;
4087 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4088 MemOp mop;
4089
4090 if (!a->p && a->rm != 0) {
4091 return false;
4092 }
4093 if (!fp_access_check(s)) {
4094 return true;
4095 }
4096
4097 if (a->rn == 31) {
4098 gen_check_sp_alignment(s);
4099 }
4100
4101 total = a->selem << a->scale;
4102 tcg_rn = cpu_reg_sp(s, a->rn);
4103
4104 mop = finalize_memop_asimd(s, a->scale);
4105 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4106 total, mop);
4107
4108 tcg_ebytes = tcg_constant_i64(1 << a->scale);
4109 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4110 do_vec_ld(s, rt, a->index, clean_addr, mop);
4111 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4112 }
4113
4114 if (a->p) {
4115 if (a->rm == 31) {
4116 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4117 } else {
4118 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4119 }
4120 }
4121 return true;
4122 }
4123
trans_LD_single_repl(DisasContext * s,arg_LD_single_repl * a)4124 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
4125 {
4126 int xs, total, rt;
4127 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4128 MemOp mop;
4129
4130 if (!a->p && a->rm != 0) {
4131 return false;
4132 }
4133 if (!fp_access_check(s)) {
4134 return true;
4135 }
4136
4137 if (a->rn == 31) {
4138 gen_check_sp_alignment(s);
4139 }
4140
4141 total = a->selem << a->scale;
4142 tcg_rn = cpu_reg_sp(s, a->rn);
4143
4144 mop = finalize_memop_asimd(s, a->scale);
4145 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4146 total, mop);
4147
4148 tcg_ebytes = tcg_constant_i64(1 << a->scale);
4149 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4150 /* Load and replicate to all elements */
4151 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4152
4153 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
4154 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
4155 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
4156 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4157 }
4158
4159 if (a->p) {
4160 if (a->rm == 31) {
4161 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4162 } else {
4163 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4164 }
4165 }
4166 return true;
4167 }
4168
trans_STZGM(DisasContext * s,arg_ldst_tag * a)4169 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4170 {
4171 TCGv_i64 addr, clean_addr, tcg_rt;
4172 int size = 4 << s->dcz_blocksize;
4173
4174 if (!dc_isar_feature(aa64_mte, s)) {
4175 return false;
4176 }
4177 if (s->current_el == 0) {
4178 return false;
4179 }
4180
4181 if (a->rn == 31) {
4182 gen_check_sp_alignment(s);
4183 }
4184
4185 addr = read_cpu_reg_sp(s, a->rn, true);
4186 tcg_gen_addi_i64(addr, addr, a->imm);
4187 tcg_rt = cpu_reg(s, a->rt);
4188
4189 if (s->ata[0]) {
4190 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4191 }
4192 /*
4193 * The non-tags portion of STZGM is mostly like DC_ZVA,
4194 * except the alignment happens before the access.
4195 */
4196 clean_addr = clean_data_tbi(s, addr);
4197 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4198 gen_helper_dc_zva(tcg_env, clean_addr);
4199 return true;
4200 }
4201
trans_STGM(DisasContext * s,arg_ldst_tag * a)4202 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4203 {
4204 TCGv_i64 addr, clean_addr, tcg_rt;
4205
4206 if (!dc_isar_feature(aa64_mte, s)) {
4207 return false;
4208 }
4209 if (s->current_el == 0) {
4210 return false;
4211 }
4212
4213 if (a->rn == 31) {
4214 gen_check_sp_alignment(s);
4215 }
4216
4217 addr = read_cpu_reg_sp(s, a->rn, true);
4218 tcg_gen_addi_i64(addr, addr, a->imm);
4219 tcg_rt = cpu_reg(s, a->rt);
4220
4221 if (s->ata[0]) {
4222 gen_helper_stgm(tcg_env, addr, tcg_rt);
4223 } else {
4224 MMUAccessType acc = MMU_DATA_STORE;
4225 int size = 4 << s->gm_blocksize;
4226
4227 clean_addr = clean_data_tbi(s, addr);
4228 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4229 gen_probe_access(s, clean_addr, acc, size);
4230 }
4231 return true;
4232 }
4233
trans_LDGM(DisasContext * s,arg_ldst_tag * a)4234 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4235 {
4236 TCGv_i64 addr, clean_addr, tcg_rt;
4237
4238 if (!dc_isar_feature(aa64_mte, s)) {
4239 return false;
4240 }
4241 if (s->current_el == 0) {
4242 return false;
4243 }
4244
4245 if (a->rn == 31) {
4246 gen_check_sp_alignment(s);
4247 }
4248
4249 addr = read_cpu_reg_sp(s, a->rn, true);
4250 tcg_gen_addi_i64(addr, addr, a->imm);
4251 tcg_rt = cpu_reg(s, a->rt);
4252
4253 if (s->ata[0]) {
4254 gen_helper_ldgm(tcg_rt, tcg_env, addr);
4255 } else {
4256 MMUAccessType acc = MMU_DATA_LOAD;
4257 int size = 4 << s->gm_blocksize;
4258
4259 clean_addr = clean_data_tbi(s, addr);
4260 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4261 gen_probe_access(s, clean_addr, acc, size);
4262 /* The result tags are zeros. */
4263 tcg_gen_movi_i64(tcg_rt, 0);
4264 }
4265 return true;
4266 }
4267
trans_LDG(DisasContext * s,arg_ldst_tag * a)4268 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4269 {
4270 TCGv_i64 addr, clean_addr, tcg_rt;
4271
4272 if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4273 return false;
4274 }
4275
4276 if (a->rn == 31) {
4277 gen_check_sp_alignment(s);
4278 }
4279
4280 addr = read_cpu_reg_sp(s, a->rn, true);
4281 if (!a->p) {
4282 /* pre-index or signed offset */
4283 tcg_gen_addi_i64(addr, addr, a->imm);
4284 }
4285
4286 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4287 tcg_rt = cpu_reg(s, a->rt);
4288 if (s->ata[0]) {
4289 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4290 } else {
4291 /*
4292 * Tag access disabled: we must check for aborts on the load
4293 * load from [rn+offset], and then insert a 0 tag into rt.
4294 */
4295 clean_addr = clean_data_tbi(s, addr);
4296 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4297 gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4298 }
4299
4300 if (a->w) {
4301 /* pre-index or post-index */
4302 if (a->p) {
4303 /* post-index */
4304 tcg_gen_addi_i64(addr, addr, a->imm);
4305 }
4306 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4307 }
4308 return true;
4309 }
4310
do_STG(DisasContext * s,arg_ldst_tag * a,bool is_zero,bool is_pair)4311 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4312 {
4313 TCGv_i64 addr, tcg_rt;
4314
4315 if (a->rn == 31) {
4316 gen_check_sp_alignment(s);
4317 }
4318
4319 addr = read_cpu_reg_sp(s, a->rn, true);
4320 if (!a->p) {
4321 /* pre-index or signed offset */
4322 tcg_gen_addi_i64(addr, addr, a->imm);
4323 }
4324 tcg_rt = cpu_reg_sp(s, a->rt);
4325 if (!s->ata[0]) {
4326 /*
4327 * For STG and ST2G, we need to check alignment and probe memory.
4328 * TODO: For STZG and STZ2G, we could rely on the stores below,
4329 * at least for system mode; user-only won't enforce alignment.
4330 */
4331 if (is_pair) {
4332 gen_helper_st2g_stub(tcg_env, addr);
4333 } else {
4334 gen_helper_stg_stub(tcg_env, addr);
4335 }
4336 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4337 if (is_pair) {
4338 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4339 } else {
4340 gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4341 }
4342 } else {
4343 if (is_pair) {
4344 gen_helper_st2g(tcg_env, addr, tcg_rt);
4345 } else {
4346 gen_helper_stg(tcg_env, addr, tcg_rt);
4347 }
4348 }
4349
4350 if (is_zero) {
4351 TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4352 TCGv_i64 zero64 = tcg_constant_i64(0);
4353 TCGv_i128 zero128 = tcg_temp_new_i128();
4354 int mem_index = get_mem_index(s);
4355 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4356
4357 tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4358
4359 /* This is 1 or 2 atomic 16-byte operations. */
4360 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4361 if (is_pair) {
4362 tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4363 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4364 }
4365 }
4366
4367 if (a->w) {
4368 /* pre-index or post-index */
4369 if (a->p) {
4370 /* post-index */
4371 tcg_gen_addi_i64(addr, addr, a->imm);
4372 }
4373 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4374 }
4375 return true;
4376 }
4377
4378 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4379 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4380 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4381 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4382
4383 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4384
do_SET(DisasContext * s,arg_set * a,bool is_epilogue,bool is_setg,SetFn fn)4385 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4386 bool is_setg, SetFn fn)
4387 {
4388 int memidx;
4389 uint32_t syndrome, desc = 0;
4390
4391 if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4392 return false;
4393 }
4394
4395 /*
4396 * UNPREDICTABLE cases: we choose to UNDEF, which allows
4397 * us to pull this check before the CheckMOPSEnabled() test
4398 * (which we do in the helper function)
4399 */
4400 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4401 a->rd == 31 || a->rn == 31) {
4402 return false;
4403 }
4404
4405 memidx = get_a64_user_mem_index(s, a->unpriv);
4406
4407 /*
4408 * We pass option_a == true, matching our implementation;
4409 * we pass wrong_option == false: helper function may set that bit.
4410 */
4411 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4412 is_epilogue, false, true, a->rd, a->rs, a->rn);
4413
4414 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4415 /* We may need to do MTE tag checking, so assemble the descriptor */
4416 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4417 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4418 desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4419 /* SIZEM1 and ALIGN we leave 0 (byte write) */
4420 }
4421 /* The helper function always needs the memidx even with MTE disabled */
4422 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4423
4424 /*
4425 * The helper needs the register numbers, but since they're in
4426 * the syndrome anyway, we let it extract them from there rather
4427 * than passing in an extra three integer arguments.
4428 */
4429 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4430 return true;
4431 }
4432
4433 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4434 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4435 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4436 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4437 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4438 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4439
4440 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4441
do_CPY(DisasContext * s,arg_cpy * a,bool is_epilogue,CpyFn fn)4442 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4443 {
4444 int rmemidx, wmemidx;
4445 uint32_t syndrome, rdesc = 0, wdesc = 0;
4446 bool wunpriv = extract32(a->options, 0, 1);
4447 bool runpriv = extract32(a->options, 1, 1);
4448
4449 /*
4450 * UNPREDICTABLE cases: we choose to UNDEF, which allows
4451 * us to pull this check before the CheckMOPSEnabled() test
4452 * (which we do in the helper function)
4453 */
4454 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4455 a->rd == 31 || a->rs == 31 || a->rn == 31) {
4456 return false;
4457 }
4458
4459 rmemidx = get_a64_user_mem_index(s, runpriv);
4460 wmemidx = get_a64_user_mem_index(s, wunpriv);
4461
4462 /*
4463 * We pass option_a == true, matching our implementation;
4464 * we pass wrong_option == false: helper function may set that bit.
4465 */
4466 syndrome = syn_mop(false, false, a->options, is_epilogue,
4467 false, true, a->rd, a->rs, a->rn);
4468
4469 /* If we need to do MTE tag checking, assemble the descriptors */
4470 if (s->mte_active[runpriv]) {
4471 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4472 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4473 }
4474 if (s->mte_active[wunpriv]) {
4475 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4476 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4477 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4478 }
4479 /* The helper function needs these parts of the descriptor regardless */
4480 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4481 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4482
4483 /*
4484 * The helper needs the register numbers, but since they're in
4485 * the syndrome anyway, we let it extract them from there rather
4486 * than passing in an extra three integer arguments.
4487 */
4488 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4489 tcg_constant_i32(rdesc));
4490 return true;
4491 }
4492
4493 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4494 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4495 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4496 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4497 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4498 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4499
4500 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4501
gen_rri(DisasContext * s,arg_rri_sf * a,bool rd_sp,bool rn_sp,ArithTwoOp * fn)4502 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4503 bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4504 {
4505 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4506 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4507 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4508
4509 fn(tcg_rd, tcg_rn, tcg_imm);
4510 if (!a->sf) {
4511 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4512 }
4513 return true;
4514 }
4515
4516 /*
4517 * PC-rel. addressing
4518 */
4519
trans_ADR(DisasContext * s,arg_ri * a)4520 static bool trans_ADR(DisasContext *s, arg_ri *a)
4521 {
4522 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4523 return true;
4524 }
4525
trans_ADRP(DisasContext * s,arg_ri * a)4526 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4527 {
4528 int64_t offset = (int64_t)a->imm << 12;
4529
4530 /* The page offset is ok for CF_PCREL. */
4531 offset -= s->pc_curr & 0xfff;
4532 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4533 return true;
4534 }
4535
4536 /*
4537 * Add/subtract (immediate)
4538 */
4539 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4540 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4541 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4542 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4543
4544 /*
4545 * Add/subtract (immediate, with tags)
4546 */
4547
gen_add_sub_imm_with_tags(DisasContext * s,arg_rri_tag * a,bool sub_op)4548 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4549 bool sub_op)
4550 {
4551 TCGv_i64 tcg_rn, tcg_rd;
4552 int imm;
4553
4554 imm = a->uimm6 << LOG2_TAG_GRANULE;
4555 if (sub_op) {
4556 imm = -imm;
4557 }
4558
4559 tcg_rn = cpu_reg_sp(s, a->rn);
4560 tcg_rd = cpu_reg_sp(s, a->rd);
4561
4562 if (s->ata[0]) {
4563 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4564 tcg_constant_i32(imm),
4565 tcg_constant_i32(a->uimm4));
4566 } else {
4567 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4568 gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4569 }
4570 return true;
4571 }
4572
TRANS_FEAT(ADDG_i,aa64_mte_insn_reg,gen_add_sub_imm_with_tags,a,false)4573 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4574 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4575
4576 /* The input should be a value in the bottom e bits (with higher
4577 * bits zero); returns that value replicated into every element
4578 * of size e in a 64 bit integer.
4579 */
4580 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4581 {
4582 assert(e != 0);
4583 while (e < 64) {
4584 mask |= mask << e;
4585 e *= 2;
4586 }
4587 return mask;
4588 }
4589
4590 /*
4591 * Logical (immediate)
4592 */
4593
4594 /*
4595 * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4596 * only require the wmask. Returns false if the imms/immr/immn are a reserved
4597 * value (ie should cause a guest UNDEF exception), and true if they are
4598 * valid, in which case the decoded bit pattern is written to result.
4599 */
logic_imm_decode_wmask(uint64_t * result,unsigned int immn,unsigned int imms,unsigned int immr)4600 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4601 unsigned int imms, unsigned int immr)
4602 {
4603 uint64_t mask;
4604 unsigned e, levels, s, r;
4605 int len;
4606
4607 assert(immn < 2 && imms < 64 && immr < 64);
4608
4609 /* The bit patterns we create here are 64 bit patterns which
4610 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4611 * 64 bits each. Each element contains the same value: a run
4612 * of between 1 and e-1 non-zero bits, rotated within the
4613 * element by between 0 and e-1 bits.
4614 *
4615 * The element size and run length are encoded into immn (1 bit)
4616 * and imms (6 bits) as follows:
4617 * 64 bit elements: immn = 1, imms = <length of run - 1>
4618 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4619 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4620 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4621 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4622 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4623 * Notice that immn = 0, imms = 11111x is the only combination
4624 * not covered by one of the above options; this is reserved.
4625 * Further, <length of run - 1> all-ones is a reserved pattern.
4626 *
4627 * In all cases the rotation is by immr % e (and immr is 6 bits).
4628 */
4629
4630 /* First determine the element size */
4631 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4632 if (len < 1) {
4633 /* This is the immn == 0, imms == 0x11111x case */
4634 return false;
4635 }
4636 e = 1 << len;
4637
4638 levels = e - 1;
4639 s = imms & levels;
4640 r = immr & levels;
4641
4642 if (s == levels) {
4643 /* <length of run - 1> mustn't be all-ones. */
4644 return false;
4645 }
4646
4647 /* Create the value of one element: s+1 set bits rotated
4648 * by r within the element (which is e bits wide)...
4649 */
4650 mask = MAKE_64BIT_MASK(0, s + 1);
4651 if (r) {
4652 mask = (mask >> r) | (mask << (e - r));
4653 mask &= MAKE_64BIT_MASK(0, e);
4654 }
4655 /* ...then replicate the element over the whole 64 bit value */
4656 mask = bitfield_replicate(mask, e);
4657 *result = mask;
4658 return true;
4659 }
4660
gen_rri_log(DisasContext * s,arg_rri_log * a,bool set_cc,void (* fn)(TCGv_i64,TCGv_i64,int64_t))4661 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4662 void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4663 {
4664 TCGv_i64 tcg_rd, tcg_rn;
4665 uint64_t imm;
4666
4667 /* Some immediate field values are reserved. */
4668 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4669 extract32(a->dbm, 0, 6),
4670 extract32(a->dbm, 6, 6))) {
4671 return false;
4672 }
4673 if (!a->sf) {
4674 imm &= 0xffffffffull;
4675 }
4676
4677 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4678 tcg_rn = cpu_reg(s, a->rn);
4679
4680 fn(tcg_rd, tcg_rn, imm);
4681 if (set_cc) {
4682 gen_logic_CC(a->sf, tcg_rd);
4683 }
4684 if (!a->sf) {
4685 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4686 }
4687 return true;
4688 }
4689
TRANS(AND_i,gen_rri_log,a,false,tcg_gen_andi_i64)4690 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4691 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4692 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4693 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4694
4695 /*
4696 * Move wide (immediate)
4697 */
4698
4699 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4700 {
4701 int pos = a->hw << 4;
4702 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4703 return true;
4704 }
4705
trans_MOVN(DisasContext * s,arg_movw * a)4706 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4707 {
4708 int pos = a->hw << 4;
4709 uint64_t imm = a->imm;
4710
4711 imm = ~(imm << pos);
4712 if (!a->sf) {
4713 imm = (uint32_t)imm;
4714 }
4715 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4716 return true;
4717 }
4718
trans_MOVK(DisasContext * s,arg_movw * a)4719 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4720 {
4721 int pos = a->hw << 4;
4722 TCGv_i64 tcg_rd, tcg_im;
4723
4724 tcg_rd = cpu_reg(s, a->rd);
4725 tcg_im = tcg_constant_i64(a->imm);
4726 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4727 if (!a->sf) {
4728 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4729 }
4730 return true;
4731 }
4732
4733 /*
4734 * Bitfield
4735 */
4736
trans_SBFM(DisasContext * s,arg_SBFM * a)4737 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4738 {
4739 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4740 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4741 unsigned int bitsize = a->sf ? 64 : 32;
4742 unsigned int ri = a->immr;
4743 unsigned int si = a->imms;
4744 unsigned int pos, len;
4745
4746 if (si >= ri) {
4747 /* Wd<s-r:0> = Wn<s:r> */
4748 len = (si - ri) + 1;
4749 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4750 if (!a->sf) {
4751 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4752 }
4753 } else {
4754 /* Wd<32+s-r,32-r> = Wn<s:0> */
4755 len = si + 1;
4756 pos = (bitsize - ri) & (bitsize - 1);
4757
4758 if (len < ri) {
4759 /*
4760 * Sign extend the destination field from len to fill the
4761 * balance of the word. Let the deposit below insert all
4762 * of those sign bits.
4763 */
4764 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4765 len = ri;
4766 }
4767
4768 /*
4769 * We start with zero, and we haven't modified any bits outside
4770 * bitsize, therefore no final zero-extension is unneeded for !sf.
4771 */
4772 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4773 }
4774 return true;
4775 }
4776
trans_UBFM(DisasContext * s,arg_UBFM * a)4777 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4778 {
4779 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4780 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4781 unsigned int bitsize = a->sf ? 64 : 32;
4782 unsigned int ri = a->immr;
4783 unsigned int si = a->imms;
4784 unsigned int pos, len;
4785
4786 tcg_rd = cpu_reg(s, a->rd);
4787 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4788
4789 if (si >= ri) {
4790 /* Wd<s-r:0> = Wn<s:r> */
4791 len = (si - ri) + 1;
4792 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4793 } else {
4794 /* Wd<32+s-r,32-r> = Wn<s:0> */
4795 len = si + 1;
4796 pos = (bitsize - ri) & (bitsize - 1);
4797 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4798 }
4799 return true;
4800 }
4801
trans_BFM(DisasContext * s,arg_BFM * a)4802 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4803 {
4804 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4805 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4806 unsigned int bitsize = a->sf ? 64 : 32;
4807 unsigned int ri = a->immr;
4808 unsigned int si = a->imms;
4809 unsigned int pos, len;
4810
4811 tcg_rd = cpu_reg(s, a->rd);
4812 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4813
4814 if (si >= ri) {
4815 /* Wd<s-r:0> = Wn<s:r> */
4816 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4817 len = (si - ri) + 1;
4818 pos = 0;
4819 } else {
4820 /* Wd<32+s-r,32-r> = Wn<s:0> */
4821 len = si + 1;
4822 pos = (bitsize - ri) & (bitsize - 1);
4823 }
4824
4825 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4826 if (!a->sf) {
4827 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4828 }
4829 return true;
4830 }
4831
trans_EXTR(DisasContext * s,arg_extract * a)4832 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4833 {
4834 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4835
4836 tcg_rd = cpu_reg(s, a->rd);
4837
4838 if (unlikely(a->imm == 0)) {
4839 /*
4840 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4841 * so an extract from bit 0 is a special case.
4842 */
4843 if (a->sf) {
4844 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4845 } else {
4846 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4847 }
4848 } else {
4849 tcg_rm = cpu_reg(s, a->rm);
4850 tcg_rn = cpu_reg(s, a->rn);
4851
4852 if (a->sf) {
4853 /* Specialization to ROR happens in EXTRACT2. */
4854 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4855 } else {
4856 TCGv_i32 t0 = tcg_temp_new_i32();
4857
4858 tcg_gen_extrl_i64_i32(t0, tcg_rm);
4859 if (a->rm == a->rn) {
4860 tcg_gen_rotri_i32(t0, t0, a->imm);
4861 } else {
4862 TCGv_i32 t1 = tcg_temp_new_i32();
4863 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4864 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4865 }
4866 tcg_gen_extu_i32_i64(tcg_rd, t0);
4867 }
4868 }
4869 return true;
4870 }
4871
trans_TBL_TBX(DisasContext * s,arg_TBL_TBX * a)4872 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4873 {
4874 if (fp_access_check(s)) {
4875 int len = (a->len + 1) * 16;
4876
4877 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4878 vec_full_reg_offset(s, a->rm), tcg_env,
4879 a->q ? 16 : 8, vec_full_reg_size(s),
4880 (len << 6) | (a->tbx << 5) | a->rn,
4881 gen_helper_simd_tblx);
4882 }
4883 return true;
4884 }
4885
4886 typedef int simd_permute_idx_fn(int i, int part, int elements);
4887
do_simd_permute(DisasContext * s,arg_qrrr_e * a,simd_permute_idx_fn * fn,int part)4888 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4889 simd_permute_idx_fn *fn, int part)
4890 {
4891 MemOp esz = a->esz;
4892 int datasize = a->q ? 16 : 8;
4893 int elements = datasize >> esz;
4894 TCGv_i64 tcg_res[2], tcg_ele;
4895
4896 if (esz == MO_64 && !a->q) {
4897 return false;
4898 }
4899 if (!fp_access_check(s)) {
4900 return true;
4901 }
4902
4903 tcg_res[0] = tcg_temp_new_i64();
4904 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4905 tcg_ele = tcg_temp_new_i64();
4906
4907 for (int i = 0; i < elements; i++) {
4908 int o, w, idx;
4909
4910 idx = fn(i, part, elements);
4911 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4912 idx & (elements - 1), esz);
4913
4914 w = (i << (esz + 3)) / 64;
4915 o = (i << (esz + 3)) % 64;
4916 if (o == 0) {
4917 tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4918 } else {
4919 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4920 }
4921 }
4922
4923 for (int i = a->q; i >= 0; --i) {
4924 write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4925 }
4926 clear_vec_high(s, a->q, a->rd);
4927 return true;
4928 }
4929
permute_load_uzp(int i,int part,int elements)4930 static int permute_load_uzp(int i, int part, int elements)
4931 {
4932 return 2 * i + part;
4933 }
4934
4935 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4936 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4937
permute_load_trn(int i,int part,int elements)4938 static int permute_load_trn(int i, int part, int elements)
4939 {
4940 return (i & 1) * elements + (i & ~1) + part;
4941 }
4942
4943 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4944 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4945
permute_load_zip(int i,int part,int elements)4946 static int permute_load_zip(int i, int part, int elements)
4947 {
4948 return (i & 1) * elements + ((part * elements + i) >> 1);
4949 }
4950
4951 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4952 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4953
4954 /*
4955 * Cryptographic AES, SHA, SHA512
4956 */
4957
4958 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4959 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4960 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4961 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4962
4963 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4964 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4965 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4966 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4967
4968 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4969 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4970 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4971
4972 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4973 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4974 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4975
4976 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4977 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4978 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
TRANS_FEAT(RAX1,aa64_sha3,do_gvec_fn3,a,gen_gvec_rax1)4979 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4980 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4981 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4982 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4983
4984 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4985 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4986
4987 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4988 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4989
4990 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4991 {
4992 if (!dc_isar_feature(aa64_sm3, s)) {
4993 return false;
4994 }
4995 if (fp_access_check(s)) {
4996 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
4997 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
4998 TCGv_i32 tcg_op3 = tcg_temp_new_i32();
4999 TCGv_i32 tcg_res = tcg_temp_new_i32();
5000
5001 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
5002 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
5003 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
5004
5005 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
5006 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
5007 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
5008 tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
5009
5010 /* Clear the whole register first, then store bits [127:96]. */
5011 clear_vec(s, a->rd);
5012 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
5013 }
5014 return true;
5015 }
5016
do_crypto3i(DisasContext * s,arg_crypto3i * a,gen_helper_gvec_3 * fn)5017 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
5018 {
5019 if (fp_access_check(s)) {
5020 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
5021 }
5022 return true;
5023 }
TRANS_FEAT(SM3TT1A,aa64_sm3,do_crypto3i,a,gen_helper_crypto_sm3tt1a)5024 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
5025 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
5026 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
5027 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
5028
5029 static bool trans_XAR(DisasContext *s, arg_XAR *a)
5030 {
5031 if (!dc_isar_feature(aa64_sha3, s)) {
5032 return false;
5033 }
5034 if (fp_access_check(s)) {
5035 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
5036 vec_full_reg_offset(s, a->rn),
5037 vec_full_reg_offset(s, a->rm), a->imm, 16,
5038 vec_full_reg_size(s));
5039 }
5040 return true;
5041 }
5042
5043 /*
5044 * Advanced SIMD copy
5045 */
5046
decode_esz_idx(int imm,MemOp * pesz,unsigned * pidx)5047 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
5048 {
5049 unsigned esz = ctz32(imm);
5050 if (esz <= MO_64) {
5051 *pesz = esz;
5052 *pidx = imm >> (esz + 1);
5053 return true;
5054 }
5055 return false;
5056 }
5057
trans_DUP_element_s(DisasContext * s,arg_DUP_element_s * a)5058 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
5059 {
5060 MemOp esz;
5061 unsigned idx;
5062
5063 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5064 return false;
5065 }
5066 if (fp_access_check(s)) {
5067 /*
5068 * This instruction just extracts the specified element and
5069 * zero-extends it into the bottom of the destination register.
5070 */
5071 TCGv_i64 tmp = tcg_temp_new_i64();
5072 read_vec_element(s, tmp, a->rn, idx, esz);
5073 write_fp_dreg(s, a->rd, tmp);
5074 }
5075 return true;
5076 }
5077
trans_DUP_element_v(DisasContext * s,arg_DUP_element_v * a)5078 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
5079 {
5080 MemOp esz;
5081 unsigned idx;
5082
5083 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5084 return false;
5085 }
5086 if (esz == MO_64 && !a->q) {
5087 return false;
5088 }
5089 if (fp_access_check(s)) {
5090 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
5091 vec_reg_offset(s, a->rn, idx, esz),
5092 a->q ? 16 : 8, vec_full_reg_size(s));
5093 }
5094 return true;
5095 }
5096
trans_DUP_general(DisasContext * s,arg_DUP_general * a)5097 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
5098 {
5099 MemOp esz;
5100 unsigned idx;
5101
5102 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5103 return false;
5104 }
5105 if (esz == MO_64 && !a->q) {
5106 return false;
5107 }
5108 if (fp_access_check(s)) {
5109 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5110 a->q ? 16 : 8, vec_full_reg_size(s),
5111 cpu_reg(s, a->rn));
5112 }
5113 return true;
5114 }
5115
do_smov_umov(DisasContext * s,arg_SMOV * a,MemOp is_signed)5116 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
5117 {
5118 MemOp esz;
5119 unsigned idx;
5120
5121 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5122 return false;
5123 }
5124 if (is_signed) {
5125 if (esz == MO_64 || (esz == MO_32 && !a->q)) {
5126 return false;
5127 }
5128 } else {
5129 if (esz == MO_64 ? !a->q : a->q) {
5130 return false;
5131 }
5132 }
5133 if (fp_access_check(s)) {
5134 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5135 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
5136 if (is_signed && !a->q) {
5137 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5138 }
5139 }
5140 return true;
5141 }
5142
TRANS(SMOV,do_smov_umov,a,MO_SIGN)5143 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
5144 TRANS(UMOV, do_smov_umov, a, 0)
5145
5146 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
5147 {
5148 MemOp esz;
5149 unsigned idx;
5150
5151 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5152 return false;
5153 }
5154 if (fp_access_check(s)) {
5155 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
5156 clear_vec_high(s, true, a->rd);
5157 }
5158 return true;
5159 }
5160
trans_INS_element(DisasContext * s,arg_INS_element * a)5161 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
5162 {
5163 MemOp esz;
5164 unsigned didx, sidx;
5165
5166 if (!decode_esz_idx(a->di, &esz, &didx)) {
5167 return false;
5168 }
5169 sidx = a->si >> esz;
5170 if (fp_access_check(s)) {
5171 TCGv_i64 tmp = tcg_temp_new_i64();
5172
5173 read_vec_element(s, tmp, a->rn, sidx, esz);
5174 write_vec_element(s, tmp, a->rd, didx, esz);
5175
5176 /* INS is considered a 128-bit write for SVE. */
5177 clear_vec_high(s, true, a->rd);
5178 }
5179 return true;
5180 }
5181
5182 /*
5183 * Advanced SIMD three same
5184 */
5185
5186 typedef struct FPScalar {
5187 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5188 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5189 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5190 } FPScalar;
5191
do_fp3_scalar_with_fpsttype(DisasContext * s,arg_rrr_e * a,const FPScalar * f,int mergereg,ARMFPStatusFlavour fpsttype)5192 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
5193 const FPScalar *f, int mergereg,
5194 ARMFPStatusFlavour fpsttype)
5195 {
5196 switch (a->esz) {
5197 case MO_64:
5198 if (fp_access_check(s)) {
5199 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5200 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5201 f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
5202 write_fp_dreg_merging(s, a->rd, mergereg, t0);
5203 }
5204 break;
5205 case MO_32:
5206 if (fp_access_check(s)) {
5207 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5208 TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5209 f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
5210 write_fp_sreg_merging(s, a->rd, mergereg, t0);
5211 }
5212 break;
5213 case MO_16:
5214 if (!dc_isar_feature(aa64_fp16, s)) {
5215 return false;
5216 }
5217 if (fp_access_check(s)) {
5218 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5219 TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5220 f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
5221 write_fp_hreg_merging(s, a->rd, mergereg, t0);
5222 }
5223 break;
5224 default:
5225 return false;
5226 }
5227 return true;
5228 }
5229
do_fp3_scalar(DisasContext * s,arg_rrr_e * a,const FPScalar * f,int mergereg)5230 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
5231 int mergereg)
5232 {
5233 return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
5234 a->esz == MO_16 ?
5235 FPST_A64_F16 : FPST_A64);
5236 }
5237
do_fp3_scalar_ah_2fn(DisasContext * s,arg_rrr_e * a,const FPScalar * fnormal,const FPScalar * fah,int mergereg)5238 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
5239 const FPScalar *fnormal, const FPScalar *fah,
5240 int mergereg)
5241 {
5242 return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
5243 mergereg, select_ah_fpst(s, a->esz));
5244 }
5245
5246 /* Some insns need to call different helpers when FPCR.AH == 1 */
do_fp3_scalar_2fn(DisasContext * s,arg_rrr_e * a,const FPScalar * fnormal,const FPScalar * fah,int mergereg)5247 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
5248 const FPScalar *fnormal,
5249 const FPScalar *fah,
5250 int mergereg)
5251 {
5252 return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
5253 }
5254
5255 static const FPScalar f_scalar_fadd = {
5256 gen_helper_vfp_addh,
5257 gen_helper_vfp_adds,
5258 gen_helper_vfp_addd,
5259 };
5260 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
5261
5262 static const FPScalar f_scalar_fsub = {
5263 gen_helper_vfp_subh,
5264 gen_helper_vfp_subs,
5265 gen_helper_vfp_subd,
5266 };
5267 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
5268
5269 static const FPScalar f_scalar_fdiv = {
5270 gen_helper_vfp_divh,
5271 gen_helper_vfp_divs,
5272 gen_helper_vfp_divd,
5273 };
5274 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
5275
5276 static const FPScalar f_scalar_fmul = {
5277 gen_helper_vfp_mulh,
5278 gen_helper_vfp_muls,
5279 gen_helper_vfp_muld,
5280 };
5281 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
5282
5283 static const FPScalar f_scalar_fmax = {
5284 gen_helper_vfp_maxh,
5285 gen_helper_vfp_maxs,
5286 gen_helper_vfp_maxd,
5287 };
5288 static const FPScalar f_scalar_fmax_ah = {
5289 gen_helper_vfp_ah_maxh,
5290 gen_helper_vfp_ah_maxs,
5291 gen_helper_vfp_ah_maxd,
5292 };
5293 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
5294
5295 static const FPScalar f_scalar_fmin = {
5296 gen_helper_vfp_minh,
5297 gen_helper_vfp_mins,
5298 gen_helper_vfp_mind,
5299 };
5300 static const FPScalar f_scalar_fmin_ah = {
5301 gen_helper_vfp_ah_minh,
5302 gen_helper_vfp_ah_mins,
5303 gen_helper_vfp_ah_mind,
5304 };
5305 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
5306
5307 static const FPScalar f_scalar_fmaxnm = {
5308 gen_helper_vfp_maxnumh,
5309 gen_helper_vfp_maxnums,
5310 gen_helper_vfp_maxnumd,
5311 };
5312 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
5313
5314 static const FPScalar f_scalar_fminnm = {
5315 gen_helper_vfp_minnumh,
5316 gen_helper_vfp_minnums,
5317 gen_helper_vfp_minnumd,
5318 };
5319 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
5320
5321 static const FPScalar f_scalar_fmulx = {
5322 gen_helper_advsimd_mulxh,
5323 gen_helper_vfp_mulxs,
5324 gen_helper_vfp_mulxd,
5325 };
5326 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
5327
gen_fnmul_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5328 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5329 {
5330 gen_helper_vfp_mulh(d, n, m, s);
5331 gen_vfp_negh(d, d);
5332 }
5333
gen_fnmul_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5334 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5335 {
5336 gen_helper_vfp_muls(d, n, m, s);
5337 gen_vfp_negs(d, d);
5338 }
5339
gen_fnmul_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5340 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5341 {
5342 gen_helper_vfp_muld(d, n, m, s);
5343 gen_vfp_negd(d, d);
5344 }
5345
gen_fnmul_ah_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5346 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5347 {
5348 gen_helper_vfp_mulh(d, n, m, s);
5349 gen_vfp_ah_negh(d, d);
5350 }
5351
gen_fnmul_ah_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5352 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5353 {
5354 gen_helper_vfp_muls(d, n, m, s);
5355 gen_vfp_ah_negs(d, d);
5356 }
5357
gen_fnmul_ah_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5358 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5359 {
5360 gen_helper_vfp_muld(d, n, m, s);
5361 gen_vfp_ah_negd(d, d);
5362 }
5363
5364 static const FPScalar f_scalar_fnmul = {
5365 gen_fnmul_h,
5366 gen_fnmul_s,
5367 gen_fnmul_d,
5368 };
5369 static const FPScalar f_scalar_ah_fnmul = {
5370 gen_fnmul_ah_h,
5371 gen_fnmul_ah_s,
5372 gen_fnmul_ah_d,
5373 };
5374 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
5375
5376 static const FPScalar f_scalar_fcmeq = {
5377 gen_helper_advsimd_ceq_f16,
5378 gen_helper_neon_ceq_f32,
5379 gen_helper_neon_ceq_f64,
5380 };
5381 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
5382
5383 static const FPScalar f_scalar_fcmge = {
5384 gen_helper_advsimd_cge_f16,
5385 gen_helper_neon_cge_f32,
5386 gen_helper_neon_cge_f64,
5387 };
5388 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
5389
5390 static const FPScalar f_scalar_fcmgt = {
5391 gen_helper_advsimd_cgt_f16,
5392 gen_helper_neon_cgt_f32,
5393 gen_helper_neon_cgt_f64,
5394 };
5395 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
5396
5397 static const FPScalar f_scalar_facge = {
5398 gen_helper_advsimd_acge_f16,
5399 gen_helper_neon_acge_f32,
5400 gen_helper_neon_acge_f64,
5401 };
5402 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
5403
5404 static const FPScalar f_scalar_facgt = {
5405 gen_helper_advsimd_acgt_f16,
5406 gen_helper_neon_acgt_f32,
5407 gen_helper_neon_acgt_f64,
5408 };
5409 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
5410
gen_fabd_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5411 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5412 {
5413 gen_helper_vfp_subh(d, n, m, s);
5414 gen_vfp_absh(d, d);
5415 }
5416
gen_fabd_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5417 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5418 {
5419 gen_helper_vfp_subs(d, n, m, s);
5420 gen_vfp_abss(d, d);
5421 }
5422
gen_fabd_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5423 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5424 {
5425 gen_helper_vfp_subd(d, n, m, s);
5426 gen_vfp_absd(d, d);
5427 }
5428
gen_fabd_ah_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5429 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5430 {
5431 gen_helper_vfp_subh(d, n, m, s);
5432 gen_vfp_ah_absh(d, d);
5433 }
5434
gen_fabd_ah_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5435 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5436 {
5437 gen_helper_vfp_subs(d, n, m, s);
5438 gen_vfp_ah_abss(d, d);
5439 }
5440
gen_fabd_ah_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5441 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5442 {
5443 gen_helper_vfp_subd(d, n, m, s);
5444 gen_vfp_ah_absd(d, d);
5445 }
5446
5447 static const FPScalar f_scalar_fabd = {
5448 gen_fabd_h,
5449 gen_fabd_s,
5450 gen_fabd_d,
5451 };
5452 static const FPScalar f_scalar_ah_fabd = {
5453 gen_fabd_ah_h,
5454 gen_fabd_ah_s,
5455 gen_fabd_ah_d,
5456 };
5457 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
5458
5459 static const FPScalar f_scalar_frecps = {
5460 gen_helper_recpsf_f16,
5461 gen_helper_recpsf_f32,
5462 gen_helper_recpsf_f64,
5463 };
5464 static const FPScalar f_scalar_ah_frecps = {
5465 gen_helper_recpsf_ah_f16,
5466 gen_helper_recpsf_ah_f32,
5467 gen_helper_recpsf_ah_f64,
5468 };
5469 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
5470 &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
5471
5472 static const FPScalar f_scalar_frsqrts = {
5473 gen_helper_rsqrtsf_f16,
5474 gen_helper_rsqrtsf_f32,
5475 gen_helper_rsqrtsf_f64,
5476 };
5477 static const FPScalar f_scalar_ah_frsqrts = {
5478 gen_helper_rsqrtsf_ah_f16,
5479 gen_helper_rsqrtsf_ah_f32,
5480 gen_helper_rsqrtsf_ah_f64,
5481 };
5482 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
5483 &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
5484
do_fcmp0_s(DisasContext * s,arg_rr_e * a,const FPScalar * f,bool swap)5485 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5486 const FPScalar *f, bool swap)
5487 {
5488 switch (a->esz) {
5489 case MO_64:
5490 if (fp_access_check(s)) {
5491 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5492 TCGv_i64 t1 = tcg_constant_i64(0);
5493 if (swap) {
5494 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
5495 } else {
5496 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5497 }
5498 write_fp_dreg(s, a->rd, t0);
5499 }
5500 break;
5501 case MO_32:
5502 if (fp_access_check(s)) {
5503 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5504 TCGv_i32 t1 = tcg_constant_i32(0);
5505 if (swap) {
5506 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
5507 } else {
5508 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5509 }
5510 write_fp_sreg(s, a->rd, t0);
5511 }
5512 break;
5513 case MO_16:
5514 if (!dc_isar_feature(aa64_fp16, s)) {
5515 return false;
5516 }
5517 if (fp_access_check(s)) {
5518 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5519 TCGv_i32 t1 = tcg_constant_i32(0);
5520 if (swap) {
5521 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
5522 } else {
5523 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5524 }
5525 write_fp_sreg(s, a->rd, t0);
5526 }
5527 break;
5528 default:
5529 return false;
5530 }
5531 return true;
5532 }
5533
5534 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
5535 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
5536 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
5537 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
5538 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
5539
do_satacc_s(DisasContext * s,arg_rrr_e * a,MemOp sgn_n,MemOp sgn_m,void (* gen_bhs)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64,MemOp),void (* gen_d)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64))5540 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5541 MemOp sgn_n, MemOp sgn_m,
5542 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5543 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5544 {
5545 TCGv_i64 t0, t1, t2, qc;
5546 MemOp esz = a->esz;
5547
5548 if (!fp_access_check(s)) {
5549 return true;
5550 }
5551
5552 t0 = tcg_temp_new_i64();
5553 t1 = tcg_temp_new_i64();
5554 t2 = tcg_temp_new_i64();
5555 qc = tcg_temp_new_i64();
5556 read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5557 read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5558 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5559
5560 if (esz == MO_64) {
5561 gen_d(t0, qc, t1, t2);
5562 } else {
5563 gen_bhs(t0, qc, t1, t2, esz);
5564 tcg_gen_ext_i64(t0, t0, esz);
5565 }
5566
5567 write_fp_dreg(s, a->rd, t0);
5568 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5569 return true;
5570 }
5571
TRANS(SQADD_s,do_satacc_s,a,MO_SIGN,MO_SIGN,gen_sqadd_bhs,gen_sqadd_d)5572 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5573 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5574 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5575 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5576 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5577 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5578
5579 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5580 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5581 {
5582 if (fp_access_check(s)) {
5583 TCGv_i64 t0 = tcg_temp_new_i64();
5584 TCGv_i64 t1 = tcg_temp_new_i64();
5585
5586 read_vec_element(s, t0, a->rn, 0, MO_64);
5587 read_vec_element(s, t1, a->rm, 0, MO_64);
5588 fn(t0, t0, t1);
5589 write_fp_dreg(s, a->rd, t0);
5590 }
5591 return true;
5592 }
5593
5594 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5595 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5596 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5597 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5598 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5599 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5600
5601 typedef struct ENVScalar2 {
5602 NeonGenTwoOpEnvFn *gen_bhs[3];
5603 NeonGenTwo64OpEnvFn *gen_d;
5604 } ENVScalar2;
5605
do_env_scalar2(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5606 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5607 {
5608 if (!fp_access_check(s)) {
5609 return true;
5610 }
5611 if (a->esz == MO_64) {
5612 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5613 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5614 f->gen_d(t0, tcg_env, t0, t1);
5615 write_fp_dreg(s, a->rd, t0);
5616 } else {
5617 TCGv_i32 t0 = tcg_temp_new_i32();
5618 TCGv_i32 t1 = tcg_temp_new_i32();
5619
5620 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5621 read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5622 f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5623 write_fp_sreg(s, a->rd, t0);
5624 }
5625 return true;
5626 }
5627
5628 static const ENVScalar2 f_scalar_sqshl = {
5629 { gen_helper_neon_qshl_s8,
5630 gen_helper_neon_qshl_s16,
5631 gen_helper_neon_qshl_s32 },
5632 gen_helper_neon_qshl_s64,
5633 };
5634 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5635
5636 static const ENVScalar2 f_scalar_uqshl = {
5637 { gen_helper_neon_qshl_u8,
5638 gen_helper_neon_qshl_u16,
5639 gen_helper_neon_qshl_u32 },
5640 gen_helper_neon_qshl_u64,
5641 };
5642 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5643
5644 static const ENVScalar2 f_scalar_sqrshl = {
5645 { gen_helper_neon_qrshl_s8,
5646 gen_helper_neon_qrshl_s16,
5647 gen_helper_neon_qrshl_s32 },
5648 gen_helper_neon_qrshl_s64,
5649 };
5650 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5651
5652 static const ENVScalar2 f_scalar_uqrshl = {
5653 { gen_helper_neon_qrshl_u8,
5654 gen_helper_neon_qrshl_u16,
5655 gen_helper_neon_qrshl_u32 },
5656 gen_helper_neon_qrshl_u64,
5657 };
5658 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5659
do_env_scalar2_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5660 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5661 const ENVScalar2 *f)
5662 {
5663 if (a->esz == MO_16 || a->esz == MO_32) {
5664 return do_env_scalar2(s, a, f);
5665 }
5666 return false;
5667 }
5668
5669 static const ENVScalar2 f_scalar_sqdmulh = {
5670 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5671 };
5672 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5673
5674 static const ENVScalar2 f_scalar_sqrdmulh = {
5675 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5676 };
5677 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5678
5679 typedef struct ENVScalar3 {
5680 NeonGenThreeOpEnvFn *gen_hs[2];
5681 } ENVScalar3;
5682
do_env_scalar3_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar3 * f)5683 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5684 const ENVScalar3 *f)
5685 {
5686 TCGv_i32 t0, t1, t2;
5687
5688 if (a->esz != MO_16 && a->esz != MO_32) {
5689 return false;
5690 }
5691 if (!fp_access_check(s)) {
5692 return true;
5693 }
5694
5695 t0 = tcg_temp_new_i32();
5696 t1 = tcg_temp_new_i32();
5697 t2 = tcg_temp_new_i32();
5698 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5699 read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5700 read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5701 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5702 write_fp_sreg(s, a->rd, t0);
5703 return true;
5704 }
5705
5706 static const ENVScalar3 f_scalar_sqrdmlah = {
5707 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5708 };
5709 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5710
5711 static const ENVScalar3 f_scalar_sqrdmlsh = {
5712 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5713 };
5714 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5715
do_cmop_d(DisasContext * s,arg_rrr_e * a,TCGCond cond)5716 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5717 {
5718 if (fp_access_check(s)) {
5719 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5720 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5721 tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5722 write_fp_dreg(s, a->rd, t0);
5723 }
5724 return true;
5725 }
5726
TRANS(CMGT_s,do_cmop_d,a,TCG_COND_GT)5727 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5728 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5729 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5730 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5731 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5732 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5733
5734 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
5735 int data,
5736 gen_helper_gvec_3_ptr * const fns[3],
5737 ARMFPStatusFlavour fpsttype)
5738 {
5739 MemOp esz = a->esz;
5740 int check = fp_access_check_vector_hsd(s, a->q, esz);
5741
5742 if (check <= 0) {
5743 return check == 0;
5744 }
5745
5746 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
5747 data, fns[esz - 1]);
5748 return true;
5749 }
5750
do_fp3_vector(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fns[3])5751 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5752 gen_helper_gvec_3_ptr * const fns[3])
5753 {
5754 return do_fp3_vector_with_fpsttype(s, a, data, fns,
5755 a->esz == MO_16 ?
5756 FPST_A64_F16 : FPST_A64);
5757 }
5758
do_fp3_vector_2fn(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fnormal[3],gen_helper_gvec_3_ptr * const fah[3])5759 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5760 gen_helper_gvec_3_ptr * const fnormal[3],
5761 gen_helper_gvec_3_ptr * const fah[3])
5762 {
5763 return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
5764 }
5765
do_fp3_vector_ah_2fn(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fnormal[3],gen_helper_gvec_3_ptr * const fah[3])5766 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5767 gen_helper_gvec_3_ptr * const fnormal[3],
5768 gen_helper_gvec_3_ptr * const fah[3])
5769 {
5770 return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
5771 select_ah_fpst(s, a->esz));
5772 }
5773
5774 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5775 gen_helper_gvec_fadd_h,
5776 gen_helper_gvec_fadd_s,
5777 gen_helper_gvec_fadd_d,
5778 };
5779 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5780
5781 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5782 gen_helper_gvec_fsub_h,
5783 gen_helper_gvec_fsub_s,
5784 gen_helper_gvec_fsub_d,
5785 };
5786 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5787
5788 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5789 gen_helper_gvec_fdiv_h,
5790 gen_helper_gvec_fdiv_s,
5791 gen_helper_gvec_fdiv_d,
5792 };
5793 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5794
5795 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5796 gen_helper_gvec_fmul_h,
5797 gen_helper_gvec_fmul_s,
5798 gen_helper_gvec_fmul_d,
5799 };
5800 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5801
5802 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5803 gen_helper_gvec_fmax_h,
5804 gen_helper_gvec_fmax_s,
5805 gen_helper_gvec_fmax_d,
5806 };
5807 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
5808 gen_helper_gvec_ah_fmax_h,
5809 gen_helper_gvec_ah_fmax_s,
5810 gen_helper_gvec_ah_fmax_d,
5811 };
5812 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
5813
5814 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5815 gen_helper_gvec_fmin_h,
5816 gen_helper_gvec_fmin_s,
5817 gen_helper_gvec_fmin_d,
5818 };
5819 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
5820 gen_helper_gvec_ah_fmin_h,
5821 gen_helper_gvec_ah_fmin_s,
5822 gen_helper_gvec_ah_fmin_d,
5823 };
5824 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
5825
5826 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5827 gen_helper_gvec_fmaxnum_h,
5828 gen_helper_gvec_fmaxnum_s,
5829 gen_helper_gvec_fmaxnum_d,
5830 };
5831 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5832
5833 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5834 gen_helper_gvec_fminnum_h,
5835 gen_helper_gvec_fminnum_s,
5836 gen_helper_gvec_fminnum_d,
5837 };
5838 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5839
5840 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5841 gen_helper_gvec_fmulx_h,
5842 gen_helper_gvec_fmulx_s,
5843 gen_helper_gvec_fmulx_d,
5844 };
5845 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5846
5847 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5848 gen_helper_gvec_vfma_h,
5849 gen_helper_gvec_vfma_s,
5850 gen_helper_gvec_vfma_d,
5851 };
5852 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5853
5854 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5855 gen_helper_gvec_vfms_h,
5856 gen_helper_gvec_vfms_s,
5857 gen_helper_gvec_vfms_d,
5858 };
5859 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
5860 gen_helper_gvec_ah_vfms_h,
5861 gen_helper_gvec_ah_vfms_s,
5862 gen_helper_gvec_ah_vfms_d,
5863 };
5864 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
5865
5866 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5867 gen_helper_gvec_fceq_h,
5868 gen_helper_gvec_fceq_s,
5869 gen_helper_gvec_fceq_d,
5870 };
5871 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5872
5873 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5874 gen_helper_gvec_fcge_h,
5875 gen_helper_gvec_fcge_s,
5876 gen_helper_gvec_fcge_d,
5877 };
5878 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5879
5880 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5881 gen_helper_gvec_fcgt_h,
5882 gen_helper_gvec_fcgt_s,
5883 gen_helper_gvec_fcgt_d,
5884 };
5885 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5886
5887 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5888 gen_helper_gvec_facge_h,
5889 gen_helper_gvec_facge_s,
5890 gen_helper_gvec_facge_d,
5891 };
5892 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5893
5894 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5895 gen_helper_gvec_facgt_h,
5896 gen_helper_gvec_facgt_s,
5897 gen_helper_gvec_facgt_d,
5898 };
5899 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5900
5901 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5902 gen_helper_gvec_fabd_h,
5903 gen_helper_gvec_fabd_s,
5904 gen_helper_gvec_fabd_d,
5905 };
5906 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
5907 gen_helper_gvec_ah_fabd_h,
5908 gen_helper_gvec_ah_fabd_s,
5909 gen_helper_gvec_ah_fabd_d,
5910 };
5911 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
5912
5913 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5914 gen_helper_gvec_recps_h,
5915 gen_helper_gvec_recps_s,
5916 gen_helper_gvec_recps_d,
5917 };
5918 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
5919 gen_helper_gvec_ah_recps_h,
5920 gen_helper_gvec_ah_recps_s,
5921 gen_helper_gvec_ah_recps_d,
5922 };
5923 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
5924
5925 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5926 gen_helper_gvec_rsqrts_h,
5927 gen_helper_gvec_rsqrts_s,
5928 gen_helper_gvec_rsqrts_d,
5929 };
5930 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
5931 gen_helper_gvec_ah_rsqrts_h,
5932 gen_helper_gvec_ah_rsqrts_s,
5933 gen_helper_gvec_ah_rsqrts_d,
5934 };
5935 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
5936
5937 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5938 gen_helper_gvec_faddp_h,
5939 gen_helper_gvec_faddp_s,
5940 gen_helper_gvec_faddp_d,
5941 };
5942 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5943
5944 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5945 gen_helper_gvec_fmaxp_h,
5946 gen_helper_gvec_fmaxp_s,
5947 gen_helper_gvec_fmaxp_d,
5948 };
5949 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
5950 gen_helper_gvec_ah_fmaxp_h,
5951 gen_helper_gvec_ah_fmaxp_s,
5952 gen_helper_gvec_ah_fmaxp_d,
5953 };
5954 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
5955
5956 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5957 gen_helper_gvec_fminp_h,
5958 gen_helper_gvec_fminp_s,
5959 gen_helper_gvec_fminp_d,
5960 };
5961 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
5962 gen_helper_gvec_ah_fminp_h,
5963 gen_helper_gvec_ah_fminp_s,
5964 gen_helper_gvec_ah_fminp_d,
5965 };
5966 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
5967
5968 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5969 gen_helper_gvec_fmaxnump_h,
5970 gen_helper_gvec_fmaxnump_s,
5971 gen_helper_gvec_fmaxnump_d,
5972 };
5973 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5974
5975 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5976 gen_helper_gvec_fminnump_h,
5977 gen_helper_gvec_fminnump_s,
5978 gen_helper_gvec_fminnump_d,
5979 };
5980 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5981
do_fmlal(DisasContext * s,arg_qrrr_e * a,bool is_s,bool is_2)5982 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5983 {
5984 if (fp_access_check(s)) {
5985 int data = (is_2 << 1) | is_s;
5986 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5987 vec_full_reg_offset(s, a->rn),
5988 vec_full_reg_offset(s, a->rm), tcg_env,
5989 a->q ? 16 : 8, vec_full_reg_size(s),
5990 data, gen_helper_gvec_fmlal_a64);
5991 }
5992 return true;
5993 }
5994
TRANS_FEAT(FMLAL_v,aa64_fhm,do_fmlal,a,false,false)5995 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
5996 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
5997 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
5998 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
5999
6000 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
6001 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
6002 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
6003 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
6004 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
6005
6006 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
6007 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
6008 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
6009 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
6010 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
6011
6012 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
6013 {
6014 if (fp_access_check(s)) {
6015 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
6016 }
6017 return true;
6018 }
6019
6020 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
6021 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
6022 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
6023
TRANS(SQADD_v,do_gvec_fn3,a,gen_gvec_sqadd_qc)6024 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
6025 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
6026 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
6027 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
6028 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
6029 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
6030
6031 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
6032 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
6033 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
6034 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
6035 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
6036 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
6037 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
6038 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
6039
6040 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
6041 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
6042 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
6043 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
6044 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
6045 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
6046 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
6047 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
6048 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
6049 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
6050 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
6051 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
6052 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
6053 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
6054 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
6055 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
6056 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
6057 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
6058 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
6059 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
6060
6061 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
6062 {
6063 if (a->esz == MO_64 && !a->q) {
6064 return false;
6065 }
6066 if (fp_access_check(s)) {
6067 tcg_gen_gvec_cmp(cond, a->esz,
6068 vec_full_reg_offset(s, a->rd),
6069 vec_full_reg_offset(s, a->rn),
6070 vec_full_reg_offset(s, a->rm),
6071 a->q ? 16 : 8, vec_full_reg_size(s));
6072 }
6073 return true;
6074 }
6075
TRANS(CMGT_v,do_cmop_v,a,TCG_COND_GT)6076 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
6077 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
6078 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
6079 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
6080 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
6081 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
6082
6083 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
6084 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
6085 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
6086 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
6087
6088 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
6089 gen_helper_gvec_4 *fn)
6090 {
6091 if (fp_access_check(s)) {
6092 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6093 }
6094 return true;
6095 }
6096
do_dot_vector_env(DisasContext * s,arg_qrrr_e * a,gen_helper_gvec_4_ptr * fn)6097 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
6098 gen_helper_gvec_4_ptr *fn)
6099 {
6100 if (fp_access_check(s)) {
6101 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6102 }
6103 return true;
6104 }
6105
TRANS_FEAT(SDOT_v,aa64_dp,do_dot_vector,a,gen_helper_gvec_sdot_b)6106 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
6107 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
6108 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
6109 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
6110 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
6111 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
6112 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
6113 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
6114
6115 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
6116 {
6117 if (!dc_isar_feature(aa64_bf16, s)) {
6118 return false;
6119 }
6120 if (fp_access_check(s)) {
6121 /* Q bit selects BFMLALB vs BFMLALT. */
6122 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6123 s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
6124 gen_helper_gvec_bfmlal);
6125 }
6126 return true;
6127 }
6128
6129 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
6130 gen_helper_gvec_fcaddh,
6131 gen_helper_gvec_fcadds,
6132 gen_helper_gvec_fcaddd,
6133 };
6134 /*
6135 * Encode FPCR.AH into the data so the helper knows whether the
6136 * negations it does should avoid flipping the sign bit on a NaN
6137 */
6138 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
6139 f_vector_fcadd)
6140 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
6141 f_vector_fcadd)
6142
trans_FCMLA_v(DisasContext * s,arg_FCMLA_v * a)6143 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
6144 {
6145 static gen_helper_gvec_4_ptr * const fn[] = {
6146 [MO_16] = gen_helper_gvec_fcmlah,
6147 [MO_32] = gen_helper_gvec_fcmlas,
6148 [MO_64] = gen_helper_gvec_fcmlad,
6149 };
6150 int check;
6151
6152 if (!dc_isar_feature(aa64_fcma, s)) {
6153 return false;
6154 }
6155
6156 check = fp_access_check_vector_hsd(s, a->q, a->esz);
6157 if (check <= 0) {
6158 return check == 0;
6159 }
6160
6161 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6162 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6163 a->rot | (s->fpcr_ah << 2), fn[a->esz]);
6164 return true;
6165 }
6166
6167 /*
6168 * Widening vector x vector/indexed.
6169 *
6170 * These read from the top or bottom half of a 128-bit vector.
6171 * After widening, optionally accumulate with a 128-bit vector.
6172 * Implement these inline, as the number of elements are limited
6173 * and the related SVE and SME operations on larger vectors use
6174 * even/odd elements instead of top/bottom half.
6175 *
6176 * If idx >= 0, operand 2 is indexed, otherwise vector.
6177 * If acc, operand 0 is loaded with rd.
6178 */
6179
6180 /* For low half, iterating up. */
do_3op_widening(DisasContext * s,MemOp memop,int top,int rd,int rn,int rm,int idx,NeonGenTwo64OpFn * fn,bool acc)6181 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
6182 int rd, int rn, int rm, int idx,
6183 NeonGenTwo64OpFn *fn, bool acc)
6184 {
6185 TCGv_i64 tcg_op0 = tcg_temp_new_i64();
6186 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6187 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6188 MemOp esz = memop & MO_SIZE;
6189 int half = 8 >> esz;
6190 int top_swap, top_half;
6191
6192 /* There are no 64x64->128 bit operations. */
6193 if (esz >= MO_64) {
6194 return false;
6195 }
6196 if (!fp_access_check(s)) {
6197 return true;
6198 }
6199
6200 if (idx >= 0) {
6201 read_vec_element(s, tcg_op2, rm, idx, memop);
6202 }
6203
6204 /*
6205 * For top half inputs, iterate forward; backward for bottom half.
6206 * This means the store to the destination will not occur until
6207 * overlapping input inputs are consumed.
6208 * Use top_swap to conditionally invert the forward iteration index.
6209 */
6210 top_swap = top ? 0 : half - 1;
6211 top_half = top ? half : 0;
6212
6213 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6214 int elt = elt_fwd ^ top_swap;
6215
6216 read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
6217 if (idx < 0) {
6218 read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
6219 }
6220 if (acc) {
6221 read_vec_element(s, tcg_op0, rd, elt, memop + 1);
6222 }
6223 fn(tcg_op0, tcg_op1, tcg_op2);
6224 write_vec_element(s, tcg_op0, rd, elt, esz + 1);
6225 }
6226 clear_vec_high(s, 1, rd);
6227 return true;
6228 }
6229
gen_muladd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6230 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6231 {
6232 TCGv_i64 t = tcg_temp_new_i64();
6233 tcg_gen_mul_i64(t, n, m);
6234 tcg_gen_add_i64(d, d, t);
6235 }
6236
gen_mulsub_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6237 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6238 {
6239 TCGv_i64 t = tcg_temp_new_i64();
6240 tcg_gen_mul_i64(t, n, m);
6241 tcg_gen_sub_i64(d, d, t);
6242 }
6243
6244 TRANS(SMULL_v, do_3op_widening,
6245 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6246 tcg_gen_mul_i64, false)
6247 TRANS(UMULL_v, do_3op_widening,
6248 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6249 tcg_gen_mul_i64, false)
6250 TRANS(SMLAL_v, do_3op_widening,
6251 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6252 gen_muladd_i64, true)
6253 TRANS(UMLAL_v, do_3op_widening,
6254 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6255 gen_muladd_i64, true)
6256 TRANS(SMLSL_v, do_3op_widening,
6257 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6258 gen_mulsub_i64, true)
6259 TRANS(UMLSL_v, do_3op_widening,
6260 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6261 gen_mulsub_i64, true)
6262
6263 TRANS(SMULL_vi, do_3op_widening,
6264 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6265 tcg_gen_mul_i64, false)
6266 TRANS(UMULL_vi, do_3op_widening,
6267 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6268 tcg_gen_mul_i64, false)
6269 TRANS(SMLAL_vi, do_3op_widening,
6270 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6271 gen_muladd_i64, true)
6272 TRANS(UMLAL_vi, do_3op_widening,
6273 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6274 gen_muladd_i64, true)
6275 TRANS(SMLSL_vi, do_3op_widening,
6276 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6277 gen_mulsub_i64, true)
6278 TRANS(UMLSL_vi, do_3op_widening,
6279 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6280 gen_mulsub_i64, true)
6281
gen_sabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6282 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6283 {
6284 TCGv_i64 t1 = tcg_temp_new_i64();
6285 TCGv_i64 t2 = tcg_temp_new_i64();
6286
6287 tcg_gen_sub_i64(t1, n, m);
6288 tcg_gen_sub_i64(t2, m, n);
6289 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
6290 }
6291
gen_uabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6292 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6293 {
6294 TCGv_i64 t1 = tcg_temp_new_i64();
6295 TCGv_i64 t2 = tcg_temp_new_i64();
6296
6297 tcg_gen_sub_i64(t1, n, m);
6298 tcg_gen_sub_i64(t2, m, n);
6299 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
6300 }
6301
gen_saba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6302 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6303 {
6304 TCGv_i64 t = tcg_temp_new_i64();
6305 gen_sabd_i64(t, n, m);
6306 tcg_gen_add_i64(d, d, t);
6307 }
6308
gen_uaba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6309 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6310 {
6311 TCGv_i64 t = tcg_temp_new_i64();
6312 gen_uabd_i64(t, n, m);
6313 tcg_gen_add_i64(d, d, t);
6314 }
6315
6316 TRANS(SADDL_v, do_3op_widening,
6317 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6318 tcg_gen_add_i64, false)
6319 TRANS(UADDL_v, do_3op_widening,
6320 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6321 tcg_gen_add_i64, false)
6322 TRANS(SSUBL_v, do_3op_widening,
6323 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6324 tcg_gen_sub_i64, false)
6325 TRANS(USUBL_v, do_3op_widening,
6326 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6327 tcg_gen_sub_i64, false)
6328 TRANS(SABDL_v, do_3op_widening,
6329 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6330 gen_sabd_i64, false)
6331 TRANS(UABDL_v, do_3op_widening,
6332 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6333 gen_uabd_i64, false)
6334 TRANS(SABAL_v, do_3op_widening,
6335 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6336 gen_saba_i64, true)
6337 TRANS(UABAL_v, do_3op_widening,
6338 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6339 gen_uaba_i64, true)
6340
gen_sqdmull_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6341 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6342 {
6343 tcg_gen_mul_i64(d, n, m);
6344 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6345 }
6346
gen_sqdmull_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6347 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6348 {
6349 tcg_gen_mul_i64(d, n, m);
6350 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6351 }
6352
gen_sqdmlal_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6353 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6354 {
6355 TCGv_i64 t = tcg_temp_new_i64();
6356
6357 tcg_gen_mul_i64(t, n, m);
6358 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6359 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6360 }
6361
gen_sqdmlal_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6362 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6363 {
6364 TCGv_i64 t = tcg_temp_new_i64();
6365
6366 tcg_gen_mul_i64(t, n, m);
6367 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6368 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6369 }
6370
gen_sqdmlsl_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6371 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6372 {
6373 TCGv_i64 t = tcg_temp_new_i64();
6374
6375 tcg_gen_mul_i64(t, n, m);
6376 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6377 tcg_gen_neg_i64(t, t);
6378 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6379 }
6380
gen_sqdmlsl_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6381 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6382 {
6383 TCGv_i64 t = tcg_temp_new_i64();
6384
6385 tcg_gen_mul_i64(t, n, m);
6386 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6387 tcg_gen_neg_i64(t, t);
6388 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6389 }
6390
6391 TRANS(SQDMULL_v, do_3op_widening,
6392 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6393 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6394 TRANS(SQDMLAL_v, do_3op_widening,
6395 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6396 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6397 TRANS(SQDMLSL_v, do_3op_widening,
6398 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6399 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6400
6401 TRANS(SQDMULL_vi, do_3op_widening,
6402 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6403 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6404 TRANS(SQDMLAL_vi, do_3op_widening,
6405 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6406 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6407 TRANS(SQDMLSL_vi, do_3op_widening,
6408 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6409 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6410
do_addsub_wide(DisasContext * s,arg_qrrr_e * a,MemOp sign,bool sub)6411 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6412 MemOp sign, bool sub)
6413 {
6414 TCGv_i64 tcg_op0, tcg_op1;
6415 MemOp esz = a->esz;
6416 int half = 8 >> esz;
6417 bool top = a->q;
6418 int top_swap = top ? 0 : half - 1;
6419 int top_half = top ? half : 0;
6420
6421 /* There are no 64x64->128 bit operations. */
6422 if (esz >= MO_64) {
6423 return false;
6424 }
6425 if (!fp_access_check(s)) {
6426 return true;
6427 }
6428 tcg_op0 = tcg_temp_new_i64();
6429 tcg_op1 = tcg_temp_new_i64();
6430
6431 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6432 int elt = elt_fwd ^ top_swap;
6433
6434 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6435 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6436 if (sub) {
6437 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6438 } else {
6439 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6440 }
6441 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6442 }
6443 clear_vec_high(s, 1, a->rd);
6444 return true;
6445 }
6446
TRANS(SADDW,do_addsub_wide,a,MO_SIGN,false)6447 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6448 TRANS(UADDW, do_addsub_wide, a, 0, false)
6449 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6450 TRANS(USUBW, do_addsub_wide, a, 0, true)
6451
6452 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6453 bool sub, bool round)
6454 {
6455 TCGv_i64 tcg_op0, tcg_op1;
6456 MemOp esz = a->esz;
6457 int half = 8 >> esz;
6458 bool top = a->q;
6459 int ebits = 8 << esz;
6460 uint64_t rbit = 1ull << (ebits - 1);
6461 int top_swap, top_half;
6462
6463 /* There are no 128x128->64 bit operations. */
6464 if (esz >= MO_64) {
6465 return false;
6466 }
6467 if (!fp_access_check(s)) {
6468 return true;
6469 }
6470 tcg_op0 = tcg_temp_new_i64();
6471 tcg_op1 = tcg_temp_new_i64();
6472
6473 /*
6474 * For top half inputs, iterate backward; forward for bottom half.
6475 * This means the store to the destination will not occur until
6476 * overlapping input inputs are consumed.
6477 */
6478 top_swap = top ? half - 1 : 0;
6479 top_half = top ? half : 0;
6480
6481 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6482 int elt = elt_fwd ^ top_swap;
6483
6484 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6485 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6486 if (sub) {
6487 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6488 } else {
6489 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6490 }
6491 if (round) {
6492 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6493 }
6494 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6495 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6496 }
6497 clear_vec_high(s, top, a->rd);
6498 return true;
6499 }
6500
TRANS(ADDHN,do_addsub_highnarrow,a,false,false)6501 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6502 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6503 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6504 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6505
6506 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6507 {
6508 if (fp_access_check(s)) {
6509 /* The Q field specifies lo/hi half input for these insns. */
6510 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6511 }
6512 return true;
6513 }
6514
TRANS(PMULL_p8,do_pmull,a,gen_helper_neon_pmull_h)6515 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6516 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6517
6518 /*
6519 * Advanced SIMD scalar/vector x indexed element
6520 */
6521
6522 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6523 {
6524 switch (a->esz) {
6525 case MO_64:
6526 if (fp_access_check(s)) {
6527 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6528 TCGv_i64 t1 = tcg_temp_new_i64();
6529
6530 read_vec_element(s, t1, a->rm, a->idx, MO_64);
6531 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6532 write_fp_dreg_merging(s, a->rd, a->rn, t0);
6533 }
6534 break;
6535 case MO_32:
6536 if (fp_access_check(s)) {
6537 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6538 TCGv_i32 t1 = tcg_temp_new_i32();
6539
6540 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6541 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6542 write_fp_sreg_merging(s, a->rd, a->rn, t0);
6543 }
6544 break;
6545 case MO_16:
6546 if (!dc_isar_feature(aa64_fp16, s)) {
6547 return false;
6548 }
6549 if (fp_access_check(s)) {
6550 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6551 TCGv_i32 t1 = tcg_temp_new_i32();
6552
6553 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6554 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6555 write_fp_hreg_merging(s, a->rd, a->rn, t0);
6556 }
6557 break;
6558 default:
6559 g_assert_not_reached();
6560 }
6561 return true;
6562 }
6563
6564 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6565 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6566
do_fmla_scalar_idx(DisasContext * s,arg_rrx_e * a,bool neg)6567 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6568 {
6569 switch (a->esz) {
6570 case MO_64:
6571 if (fp_access_check(s)) {
6572 TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6573 TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6574 TCGv_i64 t2 = tcg_temp_new_i64();
6575
6576 read_vec_element(s, t2, a->rm, a->idx, MO_64);
6577 if (neg) {
6578 gen_vfp_maybe_ah_negd(s, t1, t1);
6579 }
6580 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6581 write_fp_dreg_merging(s, a->rd, a->rd, t0);
6582 }
6583 break;
6584 case MO_32:
6585 if (fp_access_check(s)) {
6586 TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6587 TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6588 TCGv_i32 t2 = tcg_temp_new_i32();
6589
6590 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6591 if (neg) {
6592 gen_vfp_maybe_ah_negs(s, t1, t1);
6593 }
6594 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6595 write_fp_sreg_merging(s, a->rd, a->rd, t0);
6596 }
6597 break;
6598 case MO_16:
6599 if (!dc_isar_feature(aa64_fp16, s)) {
6600 return false;
6601 }
6602 if (fp_access_check(s)) {
6603 TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6604 TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6605 TCGv_i32 t2 = tcg_temp_new_i32();
6606
6607 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6608 if (neg) {
6609 gen_vfp_maybe_ah_negh(s, t1, t1);
6610 }
6611 gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6612 fpstatus_ptr(FPST_A64_F16));
6613 write_fp_hreg_merging(s, a->rd, a->rd, t0);
6614 }
6615 break;
6616 default:
6617 g_assert_not_reached();
6618 }
6619 return true;
6620 }
6621
TRANS(FMLA_si,do_fmla_scalar_idx,a,false)6622 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6623 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6624
6625 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6626 const ENVScalar2 *f)
6627 {
6628 if (a->esz < MO_16 || a->esz > MO_32) {
6629 return false;
6630 }
6631 if (fp_access_check(s)) {
6632 TCGv_i32 t0 = tcg_temp_new_i32();
6633 TCGv_i32 t1 = tcg_temp_new_i32();
6634
6635 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6636 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6637 f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6638 write_fp_sreg(s, a->rd, t0);
6639 }
6640 return true;
6641 }
6642
6643 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6644 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6645
do_env_scalar3_idx_hs(DisasContext * s,arg_rrx_e * a,const ENVScalar3 * f)6646 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6647 const ENVScalar3 *f)
6648 {
6649 if (a->esz < MO_16 || a->esz > MO_32) {
6650 return false;
6651 }
6652 if (fp_access_check(s)) {
6653 TCGv_i32 t0 = tcg_temp_new_i32();
6654 TCGv_i32 t1 = tcg_temp_new_i32();
6655 TCGv_i32 t2 = tcg_temp_new_i32();
6656
6657 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6658 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6659 read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6660 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6661 write_fp_sreg(s, a->rd, t0);
6662 }
6663 return true;
6664 }
6665
6666 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6667 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6668
do_scalar_muladd_widening_idx(DisasContext * s,arg_rrx_e * a,NeonGenTwo64OpFn * fn,bool acc)6669 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6670 NeonGenTwo64OpFn *fn, bool acc)
6671 {
6672 if (fp_access_check(s)) {
6673 TCGv_i64 t0 = tcg_temp_new_i64();
6674 TCGv_i64 t1 = tcg_temp_new_i64();
6675 TCGv_i64 t2 = tcg_temp_new_i64();
6676
6677 if (acc) {
6678 read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6679 }
6680 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6681 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6682 fn(t0, t1, t2);
6683
6684 /* Clear the whole register first, then store scalar. */
6685 clear_vec(s, a->rd);
6686 write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6687 }
6688 return true;
6689 }
6690
6691 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6692 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6693 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6694 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6695 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6696 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6697
do_fp3_vector_idx(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_3_ptr * const fns[3])6698 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6699 gen_helper_gvec_3_ptr * const fns[3])
6700 {
6701 MemOp esz = a->esz;
6702 int check = fp_access_check_vector_hsd(s, a->q, esz);
6703
6704 if (check <= 0) {
6705 return check == 0;
6706 }
6707
6708 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6709 esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6710 a->idx, fns[esz - 1]);
6711 return true;
6712 }
6713
6714 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6715 gen_helper_gvec_fmul_idx_h,
6716 gen_helper_gvec_fmul_idx_s,
6717 gen_helper_gvec_fmul_idx_d,
6718 };
6719 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6720
6721 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6722 gen_helper_gvec_fmulx_idx_h,
6723 gen_helper_gvec_fmulx_idx_s,
6724 gen_helper_gvec_fmulx_idx_d,
6725 };
TRANS(FMULX_vi,do_fp3_vector_idx,a,f_vector_idx_fmulx)6726 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6727
6728 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6729 {
6730 static gen_helper_gvec_4_ptr * const fns[3][3] = {
6731 { gen_helper_gvec_fmla_idx_h,
6732 gen_helper_gvec_fmla_idx_s,
6733 gen_helper_gvec_fmla_idx_d },
6734 { gen_helper_gvec_fmls_idx_h,
6735 gen_helper_gvec_fmls_idx_s,
6736 gen_helper_gvec_fmls_idx_d },
6737 { gen_helper_gvec_ah_fmls_idx_h,
6738 gen_helper_gvec_ah_fmls_idx_s,
6739 gen_helper_gvec_ah_fmls_idx_d },
6740 };
6741 MemOp esz = a->esz;
6742 int check = fp_access_check_vector_hsd(s, a->q, esz);
6743
6744 if (check <= 0) {
6745 return check == 0;
6746 }
6747
6748 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6749 esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6750 a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
6751 return true;
6752 }
6753
TRANS(FMLA_vi,do_fmla_vector_idx,a,false)6754 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6755 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6756
6757 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6758 {
6759 if (fp_access_check(s)) {
6760 int data = (a->idx << 2) | (is_2 << 1) | is_s;
6761 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6762 vec_full_reg_offset(s, a->rn),
6763 vec_full_reg_offset(s, a->rm), tcg_env,
6764 a->q ? 16 : 8, vec_full_reg_size(s),
6765 data, gen_helper_gvec_fmlal_idx_a64);
6766 }
6767 return true;
6768 }
6769
TRANS_FEAT(FMLAL_vi,aa64_fhm,do_fmlal_idx,a,false,false)6770 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6771 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6772 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6773 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6774
6775 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6776 gen_helper_gvec_3 * const fns[2])
6777 {
6778 assert(a->esz == MO_16 || a->esz == MO_32);
6779 if (fp_access_check(s)) {
6780 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6781 }
6782 return true;
6783 }
6784
6785 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6786 gen_helper_gvec_mul_idx_h,
6787 gen_helper_gvec_mul_idx_s,
6788 };
TRANS(MUL_vi,do_int3_vector_idx,a,f_vector_idx_mul)6789 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6790
6791 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6792 {
6793 static gen_helper_gvec_4 * const fns[2][2] = {
6794 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6795 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6796 };
6797
6798 assert(a->esz == MO_16 || a->esz == MO_32);
6799 if (fp_access_check(s)) {
6800 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6801 a->idx, fns[a->esz - 1][sub]);
6802 }
6803 return true;
6804 }
6805
TRANS(MLA_vi,do_mla_vector_idx,a,false)6806 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6807 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6808
6809 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6810 gen_helper_gvec_4 * const fns[2])
6811 {
6812 assert(a->esz == MO_16 || a->esz == MO_32);
6813 if (fp_access_check(s)) {
6814 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6815 vec_full_reg_offset(s, a->rn),
6816 vec_full_reg_offset(s, a->rm),
6817 offsetof(CPUARMState, vfp.qc),
6818 a->q ? 16 : 8, vec_full_reg_size(s),
6819 a->idx, fns[a->esz - 1]);
6820 }
6821 return true;
6822 }
6823
6824 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6825 gen_helper_neon_sqdmulh_idx_h,
6826 gen_helper_neon_sqdmulh_idx_s,
6827 };
6828 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6829
6830 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6831 gen_helper_neon_sqrdmulh_idx_h,
6832 gen_helper_neon_sqrdmulh_idx_s,
6833 };
6834 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6835
6836 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6837 gen_helper_neon_sqrdmlah_idx_h,
6838 gen_helper_neon_sqrdmlah_idx_s,
6839 };
6840 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6841 f_vector_idx_sqrdmlah)
6842
6843 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6844 gen_helper_neon_sqrdmlsh_idx_h,
6845 gen_helper_neon_sqrdmlsh_idx_s,
6846 };
TRANS_FEAT(SQRDMLSH_vi,aa64_rdm,do_int3_qc_vector_idx,a,f_vector_idx_sqrdmlsh)6847 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6848 f_vector_idx_sqrdmlsh)
6849
6850 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6851 gen_helper_gvec_4 *fn)
6852 {
6853 if (fp_access_check(s)) {
6854 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6855 }
6856 return true;
6857 }
6858
do_dot_vector_idx_env(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_4_ptr * fn)6859 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6860 gen_helper_gvec_4_ptr *fn)
6861 {
6862 if (fp_access_check(s)) {
6863 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6864 }
6865 return true;
6866 }
6867
TRANS_FEAT(SDOT_vi,aa64_dp,do_dot_vector_idx,a,gen_helper_gvec_sdot_idx_b)6868 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6869 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6870 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6871 gen_helper_gvec_sudot_idx_b)
6872 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6873 gen_helper_gvec_usdot_idx_b)
6874 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6875 gen_helper_gvec_bfdot_idx)
6876
6877 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6878 {
6879 if (!dc_isar_feature(aa64_bf16, s)) {
6880 return false;
6881 }
6882 if (fp_access_check(s)) {
6883 /* Q bit selects BFMLALB vs BFMLALT. */
6884 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6885 s->fpcr_ah ? FPST_AH : FPST_A64,
6886 (a->idx << 1) | a->q,
6887 gen_helper_gvec_bfmlal_idx);
6888 }
6889 return true;
6890 }
6891
trans_FCMLA_vi(DisasContext * s,arg_FCMLA_vi * a)6892 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6893 {
6894 gen_helper_gvec_4_ptr *fn;
6895
6896 if (!dc_isar_feature(aa64_fcma, s)) {
6897 return false;
6898 }
6899 switch (a->esz) {
6900 case MO_16:
6901 if (!dc_isar_feature(aa64_fp16, s)) {
6902 return false;
6903 }
6904 fn = gen_helper_gvec_fcmlah_idx;
6905 break;
6906 case MO_32:
6907 fn = gen_helper_gvec_fcmlas_idx;
6908 break;
6909 default:
6910 g_assert_not_reached();
6911 }
6912 if (fp_access_check(s)) {
6913 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6914 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6915 (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
6916 }
6917 return true;
6918 }
6919
6920 /*
6921 * Advanced SIMD scalar pairwise
6922 */
6923
do_fp3_scalar_pair(DisasContext * s,arg_rr_e * a,const FPScalar * f)6924 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6925 {
6926 switch (a->esz) {
6927 case MO_64:
6928 if (fp_access_check(s)) {
6929 TCGv_i64 t0 = tcg_temp_new_i64();
6930 TCGv_i64 t1 = tcg_temp_new_i64();
6931
6932 read_vec_element(s, t0, a->rn, 0, MO_64);
6933 read_vec_element(s, t1, a->rn, 1, MO_64);
6934 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6935 write_fp_dreg(s, a->rd, t0);
6936 }
6937 break;
6938 case MO_32:
6939 if (fp_access_check(s)) {
6940 TCGv_i32 t0 = tcg_temp_new_i32();
6941 TCGv_i32 t1 = tcg_temp_new_i32();
6942
6943 read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6944 read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6945 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6946 write_fp_sreg(s, a->rd, t0);
6947 }
6948 break;
6949 case MO_16:
6950 if (!dc_isar_feature(aa64_fp16, s)) {
6951 return false;
6952 }
6953 if (fp_access_check(s)) {
6954 TCGv_i32 t0 = tcg_temp_new_i32();
6955 TCGv_i32 t1 = tcg_temp_new_i32();
6956
6957 read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6958 read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6959 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6960 write_fp_sreg(s, a->rd, t0);
6961 }
6962 break;
6963 default:
6964 g_assert_not_reached();
6965 }
6966 return true;
6967 }
6968
do_fp3_scalar_pair_2fn(DisasContext * s,arg_rr_e * a,const FPScalar * fnormal,const FPScalar * fah)6969 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
6970 const FPScalar *fnormal,
6971 const FPScalar *fah)
6972 {
6973 return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
6974 }
6975
6976 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6977 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
6978 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
6979 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6980 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6981
trans_ADDP_s(DisasContext * s,arg_rr_e * a)6982 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6983 {
6984 if (fp_access_check(s)) {
6985 TCGv_i64 t0 = tcg_temp_new_i64();
6986 TCGv_i64 t1 = tcg_temp_new_i64();
6987
6988 read_vec_element(s, t0, a->rn, 0, MO_64);
6989 read_vec_element(s, t1, a->rn, 1, MO_64);
6990 tcg_gen_add_i64(t0, t0, t1);
6991 write_fp_dreg(s, a->rd, t0);
6992 }
6993 return true;
6994 }
6995
6996 /*
6997 * Floating-point conditional select
6998 */
6999
trans_FCSEL(DisasContext * s,arg_FCSEL * a)7000 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
7001 {
7002 TCGv_i64 t_true, t_false;
7003 DisasCompare64 c;
7004 int check = fp_access_check_scalar_hsd(s, a->esz);
7005
7006 if (check <= 0) {
7007 return check == 0;
7008 }
7009
7010 /* Zero extend sreg & hreg inputs to 64 bits now. */
7011 t_true = tcg_temp_new_i64();
7012 t_false = tcg_temp_new_i64();
7013 read_vec_element(s, t_true, a->rn, 0, a->esz);
7014 read_vec_element(s, t_false, a->rm, 0, a->esz);
7015
7016 a64_test_cc(&c, a->cond);
7017 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
7018 t_true, t_false);
7019
7020 /*
7021 * Note that sregs & hregs write back zeros to the high bits,
7022 * and we've already done the zero-extension.
7023 */
7024 write_fp_dreg(s, a->rd, t_true);
7025 return true;
7026 }
7027
7028 /*
7029 * Advanced SIMD Extract
7030 */
7031
trans_EXT_d(DisasContext * s,arg_EXT_d * a)7032 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
7033 {
7034 if (fp_access_check(s)) {
7035 TCGv_i64 lo = read_fp_dreg(s, a->rn);
7036 if (a->imm != 0) {
7037 TCGv_i64 hi = read_fp_dreg(s, a->rm);
7038 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
7039 }
7040 write_fp_dreg(s, a->rd, lo);
7041 }
7042 return true;
7043 }
7044
trans_EXT_q(DisasContext * s,arg_EXT_q * a)7045 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
7046 {
7047 TCGv_i64 lo, hi;
7048 int pos = (a->imm & 7) * 8;
7049 int elt = a->imm >> 3;
7050
7051 if (!fp_access_check(s)) {
7052 return true;
7053 }
7054
7055 lo = tcg_temp_new_i64();
7056 hi = tcg_temp_new_i64();
7057
7058 read_vec_element(s, lo, a->rn, elt, MO_64);
7059 elt++;
7060 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
7061 elt++;
7062
7063 if (pos != 0) {
7064 TCGv_i64 hh = tcg_temp_new_i64();
7065 tcg_gen_extract2_i64(lo, lo, hi, pos);
7066 read_vec_element(s, hh, a->rm, elt & 1, MO_64);
7067 tcg_gen_extract2_i64(hi, hi, hh, pos);
7068 }
7069
7070 write_vec_element(s, lo, a->rd, 0, MO_64);
7071 write_vec_element(s, hi, a->rd, 1, MO_64);
7072 clear_vec_high(s, true, a->rd);
7073 return true;
7074 }
7075
7076 /*
7077 * Floating-point data-processing (3 source)
7078 */
7079
do_fmadd(DisasContext * s,arg_rrrr_e * a,bool neg_a,bool neg_n)7080 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
7081 {
7082 TCGv_ptr fpst;
7083
7084 /*
7085 * These are fused multiply-add. Note that doing the negations here
7086 * as separate steps is correct: an input NaN should come out with
7087 * its sign bit flipped if it is a negated-input.
7088 */
7089 switch (a->esz) {
7090 case MO_64:
7091 if (fp_access_check(s)) {
7092 TCGv_i64 tn = read_fp_dreg(s, a->rn);
7093 TCGv_i64 tm = read_fp_dreg(s, a->rm);
7094 TCGv_i64 ta = read_fp_dreg(s, a->ra);
7095
7096 if (neg_a) {
7097 gen_vfp_maybe_ah_negd(s, ta, ta);
7098 }
7099 if (neg_n) {
7100 gen_vfp_maybe_ah_negd(s, tn, tn);
7101 }
7102 fpst = fpstatus_ptr(FPST_A64);
7103 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
7104 write_fp_dreg_merging(s, a->rd, a->ra, ta);
7105 }
7106 break;
7107
7108 case MO_32:
7109 if (fp_access_check(s)) {
7110 TCGv_i32 tn = read_fp_sreg(s, a->rn);
7111 TCGv_i32 tm = read_fp_sreg(s, a->rm);
7112 TCGv_i32 ta = read_fp_sreg(s, a->ra);
7113
7114 if (neg_a) {
7115 gen_vfp_maybe_ah_negs(s, ta, ta);
7116 }
7117 if (neg_n) {
7118 gen_vfp_maybe_ah_negs(s, tn, tn);
7119 }
7120 fpst = fpstatus_ptr(FPST_A64);
7121 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
7122 write_fp_sreg_merging(s, a->rd, a->ra, ta);
7123 }
7124 break;
7125
7126 case MO_16:
7127 if (!dc_isar_feature(aa64_fp16, s)) {
7128 return false;
7129 }
7130 if (fp_access_check(s)) {
7131 TCGv_i32 tn = read_fp_hreg(s, a->rn);
7132 TCGv_i32 tm = read_fp_hreg(s, a->rm);
7133 TCGv_i32 ta = read_fp_hreg(s, a->ra);
7134
7135 if (neg_a) {
7136 gen_vfp_maybe_ah_negh(s, ta, ta);
7137 }
7138 if (neg_n) {
7139 gen_vfp_maybe_ah_negh(s, tn, tn);
7140 }
7141 fpst = fpstatus_ptr(FPST_A64_F16);
7142 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
7143 write_fp_hreg_merging(s, a->rd, a->ra, ta);
7144 }
7145 break;
7146
7147 default:
7148 return false;
7149 }
7150 return true;
7151 }
7152
TRANS(FMADD,do_fmadd,a,false,false)7153 TRANS(FMADD, do_fmadd, a, false, false)
7154 TRANS(FNMADD, do_fmadd, a, true, true)
7155 TRANS(FMSUB, do_fmadd, a, false, true)
7156 TRANS(FNMSUB, do_fmadd, a, true, false)
7157
7158 /*
7159 * Advanced SIMD Across Lanes
7160 */
7161
7162 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
7163 MemOp src_sign, NeonGenTwo64OpFn *fn)
7164 {
7165 TCGv_i64 tcg_res, tcg_elt;
7166 MemOp src_mop = a->esz | src_sign;
7167 int elements = (a->q ? 16 : 8) >> a->esz;
7168
7169 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
7170 if (elements < 4) {
7171 return false;
7172 }
7173 if (!fp_access_check(s)) {
7174 return true;
7175 }
7176
7177 tcg_res = tcg_temp_new_i64();
7178 tcg_elt = tcg_temp_new_i64();
7179
7180 read_vec_element(s, tcg_res, a->rn, 0, src_mop);
7181 for (int i = 1; i < elements; i++) {
7182 read_vec_element(s, tcg_elt, a->rn, i, src_mop);
7183 fn(tcg_res, tcg_res, tcg_elt);
7184 }
7185
7186 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
7187 write_fp_dreg(s, a->rd, tcg_res);
7188 return true;
7189 }
7190
7191 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
TRANS(SADDLV,do_int_reduction,a,true,MO_SIGN,tcg_gen_add_i64)7192 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
7193 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
7194 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
7195 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
7196 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
7197 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
7198
7199 /*
7200 * do_fp_reduction helper
7201 *
7202 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7203 * important for correct NaN propagation that we do these
7204 * operations in exactly the order specified by the pseudocode.
7205 *
7206 * This is a recursive function.
7207 */
7208 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
7209 int ebase, int ecount, TCGv_ptr fpst,
7210 NeonGenTwoSingleOpFn *fn)
7211 {
7212 if (ecount == 1) {
7213 TCGv_i32 tcg_elem = tcg_temp_new_i32();
7214 read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
7215 return tcg_elem;
7216 } else {
7217 int half = ecount >> 1;
7218 TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7219
7220 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
7221 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
7222 tcg_res = tcg_temp_new_i32();
7223
7224 fn(tcg_res, tcg_lo, tcg_hi, fpst);
7225 return tcg_res;
7226 }
7227 }
7228
do_fp_reduction(DisasContext * s,arg_qrr_e * a,NeonGenTwoSingleOpFn * fnormal,NeonGenTwoSingleOpFn * fah)7229 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
7230 NeonGenTwoSingleOpFn *fnormal,
7231 NeonGenTwoSingleOpFn *fah)
7232 {
7233 if (fp_access_check(s)) {
7234 MemOp esz = a->esz;
7235 int elts = (a->q ? 16 : 8) >> esz;
7236 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
7237 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
7238 s->fpcr_ah ? fah : fnormal);
7239 write_fp_sreg(s, a->rd, res);
7240 }
7241 return true;
7242 }
7243
TRANS_FEAT(FMAXNMV_h,aa64_fp16,do_fp_reduction,a,gen_helper_vfp_maxnumh,gen_helper_vfp_maxnumh)7244 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
7245 gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
7246 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
7247 gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
7248 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
7249 gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
7250 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
7251 gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
7252
7253 TRANS(FMAXNMV_s, do_fp_reduction, a,
7254 gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
7255 TRANS(FMINNMV_s, do_fp_reduction, a,
7256 gen_helper_vfp_minnums, gen_helper_vfp_minnums)
7257 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
7258 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
7259
7260 /*
7261 * Floating-point Immediate
7262 */
7263
7264 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
7265 {
7266 int check = fp_access_check_scalar_hsd(s, a->esz);
7267 uint64_t imm;
7268
7269 if (check <= 0) {
7270 return check == 0;
7271 }
7272
7273 imm = vfp_expand_imm(a->esz, a->imm);
7274 write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
7275 return true;
7276 }
7277
7278 /*
7279 * Floating point compare, conditional compare
7280 */
7281
handle_fp_compare(DisasContext * s,int size,unsigned int rn,unsigned int rm,bool cmp_with_zero,bool signal_all_nans)7282 static void handle_fp_compare(DisasContext *s, int size,
7283 unsigned int rn, unsigned int rm,
7284 bool cmp_with_zero, bool signal_all_nans)
7285 {
7286 TCGv_i64 tcg_flags = tcg_temp_new_i64();
7287 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
7288
7289 if (size == MO_64) {
7290 TCGv_i64 tcg_vn, tcg_vm;
7291
7292 tcg_vn = read_fp_dreg(s, rn);
7293 if (cmp_with_zero) {
7294 tcg_vm = tcg_constant_i64(0);
7295 } else {
7296 tcg_vm = read_fp_dreg(s, rm);
7297 }
7298 if (signal_all_nans) {
7299 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7300 } else {
7301 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7302 }
7303 } else {
7304 TCGv_i32 tcg_vn = tcg_temp_new_i32();
7305 TCGv_i32 tcg_vm = tcg_temp_new_i32();
7306
7307 read_vec_element_i32(s, tcg_vn, rn, 0, size);
7308 if (cmp_with_zero) {
7309 tcg_gen_movi_i32(tcg_vm, 0);
7310 } else {
7311 read_vec_element_i32(s, tcg_vm, rm, 0, size);
7312 }
7313
7314 switch (size) {
7315 case MO_32:
7316 if (signal_all_nans) {
7317 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7318 } else {
7319 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7320 }
7321 break;
7322 case MO_16:
7323 if (signal_all_nans) {
7324 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7325 } else {
7326 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7327 }
7328 break;
7329 default:
7330 g_assert_not_reached();
7331 }
7332 }
7333
7334 gen_set_nzcv(tcg_flags);
7335 }
7336
7337 /* FCMP, FCMPE */
trans_FCMP(DisasContext * s,arg_FCMP * a)7338 static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
7339 {
7340 int check = fp_access_check_scalar_hsd(s, a->esz);
7341
7342 if (check <= 0) {
7343 return check == 0;
7344 }
7345
7346 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
7347 return true;
7348 }
7349
7350 /* FCCMP, FCCMPE */
trans_FCCMP(DisasContext * s,arg_FCCMP * a)7351 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
7352 {
7353 TCGLabel *label_continue = NULL;
7354 int check = fp_access_check_scalar_hsd(s, a->esz);
7355
7356 if (check <= 0) {
7357 return check == 0;
7358 }
7359
7360 if (a->cond < 0x0e) { /* not always */
7361 TCGLabel *label_match = gen_new_label();
7362 label_continue = gen_new_label();
7363 arm_gen_test_cc(a->cond, label_match);
7364 /* nomatch: */
7365 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7366 tcg_gen_br(label_continue);
7367 gen_set_label(label_match);
7368 }
7369
7370 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7371
7372 if (label_continue) {
7373 gen_set_label(label_continue);
7374 }
7375 return true;
7376 }
7377
7378 /*
7379 * Advanced SIMD Modified Immediate
7380 */
7381
trans_FMOVI_v_h(DisasContext * s,arg_FMOVI_v_h * a)7382 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7383 {
7384 if (!dc_isar_feature(aa64_fp16, s)) {
7385 return false;
7386 }
7387 if (fp_access_check(s)) {
7388 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7389 a->q ? 16 : 8, vec_full_reg_size(s),
7390 vfp_expand_imm(MO_16, a->abcdefgh));
7391 }
7392 return true;
7393 }
7394
gen_movi(unsigned vece,uint32_t dofs,uint32_t aofs,int64_t c,uint32_t oprsz,uint32_t maxsz)7395 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7396 int64_t c, uint32_t oprsz, uint32_t maxsz)
7397 {
7398 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7399 }
7400
trans_Vimm(DisasContext * s,arg_Vimm * a)7401 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7402 {
7403 GVecGen2iFn *fn;
7404
7405 /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7406 if ((a->cmode & 1) && a->cmode < 12) {
7407 /* For op=1, the imm will be inverted, so BIC becomes AND. */
7408 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7409 } else {
7410 /* There is one unallocated cmode/op combination in this space */
7411 if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7412 return false;
7413 }
7414 fn = gen_movi;
7415 }
7416
7417 if (fp_access_check(s)) {
7418 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7419 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7420 }
7421 return true;
7422 }
7423
7424 /*
7425 * Advanced SIMD Shift by Immediate
7426 */
7427
do_vec_shift_imm(DisasContext * s,arg_qrri_e * a,GVecGen2iFn * fn)7428 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7429 {
7430 if (fp_access_check(s)) {
7431 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7432 }
7433 return true;
7434 }
7435
7436 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7437 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7438 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7439 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7440 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7441 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7442 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7443 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7444 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7445 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7446 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
TRANS(SQSHL_vi,do_vec_shift_imm,a,gen_neon_sqshli)7447 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7448 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7449 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7450
7451 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7452 {
7453 TCGv_i64 tcg_rn, tcg_rd;
7454 int esz = a->esz;
7455 int esize;
7456
7457 if (!fp_access_check(s)) {
7458 return true;
7459 }
7460
7461 /*
7462 * For the LL variants the store is larger than the load,
7463 * so if rd == rn we would overwrite parts of our input.
7464 * So load everything right now and use shifts in the main loop.
7465 */
7466 tcg_rd = tcg_temp_new_i64();
7467 tcg_rn = tcg_temp_new_i64();
7468 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7469
7470 esize = 8 << esz;
7471 for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7472 if (is_u) {
7473 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7474 } else {
7475 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7476 }
7477 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7478 write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7479 }
7480 clear_vec_high(s, true, a->rd);
7481 return true;
7482 }
7483
TRANS(SSHLL_v,do_vec_shift_imm_wide,a,false)7484 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7485 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7486
7487 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7488 {
7489 assert(shift >= 0 && shift <= 64);
7490 tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7491 }
7492
gen_ushr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7493 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7494 {
7495 assert(shift >= 0 && shift <= 64);
7496 if (shift == 64) {
7497 tcg_gen_movi_i64(dst, 0);
7498 } else {
7499 tcg_gen_shri_i64(dst, src, shift);
7500 }
7501 }
7502
gen_ssra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7503 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7504 {
7505 gen_sshr_d(src, src, shift);
7506 tcg_gen_add_i64(dst, dst, src);
7507 }
7508
gen_usra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7509 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7510 {
7511 gen_ushr_d(src, src, shift);
7512 tcg_gen_add_i64(dst, dst, src);
7513 }
7514
gen_srshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7515 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7516 {
7517 assert(shift >= 0 && shift <= 32);
7518 if (shift) {
7519 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7520 tcg_gen_add_i64(dst, src, rnd);
7521 tcg_gen_sari_i64(dst, dst, shift);
7522 } else {
7523 tcg_gen_mov_i64(dst, src);
7524 }
7525 }
7526
gen_urshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7527 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7528 {
7529 assert(shift >= 0 && shift <= 32);
7530 if (shift) {
7531 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7532 tcg_gen_add_i64(dst, src, rnd);
7533 tcg_gen_shri_i64(dst, dst, shift);
7534 } else {
7535 tcg_gen_mov_i64(dst, src);
7536 }
7537 }
7538
gen_srshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7539 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7540 {
7541 assert(shift >= 0 && shift <= 64);
7542 if (shift == 0) {
7543 tcg_gen_mov_i64(dst, src);
7544 } else if (shift == 64) {
7545 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7546 tcg_gen_movi_i64(dst, 0);
7547 } else {
7548 TCGv_i64 rnd = tcg_temp_new_i64();
7549 tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7550 tcg_gen_sari_i64(dst, src, shift);
7551 tcg_gen_add_i64(dst, dst, rnd);
7552 }
7553 }
7554
gen_urshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7555 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7556 {
7557 assert(shift >= 0 && shift <= 64);
7558 if (shift == 0) {
7559 tcg_gen_mov_i64(dst, src);
7560 } else if (shift == 64) {
7561 /* Rounding will propagate bit 63 into bit 64. */
7562 tcg_gen_shri_i64(dst, src, 63);
7563 } else {
7564 TCGv_i64 rnd = tcg_temp_new_i64();
7565 tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7566 tcg_gen_shri_i64(dst, src, shift);
7567 tcg_gen_add_i64(dst, dst, rnd);
7568 }
7569 }
7570
gen_srsra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7571 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7572 {
7573 gen_srshr_d(src, src, shift);
7574 tcg_gen_add_i64(dst, dst, src);
7575 }
7576
gen_ursra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7577 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7578 {
7579 gen_urshr_d(src, src, shift);
7580 tcg_gen_add_i64(dst, dst, src);
7581 }
7582
gen_sri_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7583 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7584 {
7585 /* If shift is 64, dst is unchanged. */
7586 if (shift != 64) {
7587 tcg_gen_shri_i64(src, src, shift);
7588 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7589 }
7590 }
7591
gen_sli_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7592 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7593 {
7594 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7595 }
7596
do_vec_shift_imm_narrow(DisasContext * s,arg_qrri_e * a,WideShiftImmFn * const fns[3],MemOp sign)7597 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7598 WideShiftImmFn * const fns[3], MemOp sign)
7599 {
7600 TCGv_i64 tcg_rn, tcg_rd;
7601 int esz = a->esz;
7602 int esize;
7603 WideShiftImmFn *fn;
7604
7605 tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7606
7607 if (!fp_access_check(s)) {
7608 return true;
7609 }
7610
7611 tcg_rn = tcg_temp_new_i64();
7612 tcg_rd = tcg_temp_new_i64();
7613 tcg_gen_movi_i64(tcg_rd, 0);
7614
7615 fn = fns[esz];
7616 esize = 8 << esz;
7617 for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7618 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7619 fn(tcg_rn, tcg_rn, a->imm);
7620 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7621 }
7622
7623 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7624 clear_vec_high(s, a->q, a->rd);
7625 return true;
7626 }
7627
gen_sqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7628 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7629 {
7630 tcg_gen_sari_i64(d, s, i);
7631 tcg_gen_ext16u_i64(d, d);
7632 gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7633 }
7634
gen_sqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7635 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7636 {
7637 tcg_gen_sari_i64(d, s, i);
7638 tcg_gen_ext32u_i64(d, d);
7639 gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7640 }
7641
gen_sqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7642 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7643 {
7644 gen_sshr_d(d, s, i);
7645 gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7646 }
7647
gen_uqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7648 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7649 {
7650 tcg_gen_shri_i64(d, s, i);
7651 gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7652 }
7653
gen_uqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7654 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7655 {
7656 tcg_gen_shri_i64(d, s, i);
7657 gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7658 }
7659
gen_uqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7660 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7661 {
7662 gen_ushr_d(d, s, i);
7663 gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7664 }
7665
gen_sqshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7666 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7667 {
7668 tcg_gen_sari_i64(d, s, i);
7669 tcg_gen_ext16u_i64(d, d);
7670 gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7671 }
7672
gen_sqshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7673 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7674 {
7675 tcg_gen_sari_i64(d, s, i);
7676 tcg_gen_ext32u_i64(d, d);
7677 gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7678 }
7679
gen_sqshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7680 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7681 {
7682 gen_sshr_d(d, s, i);
7683 gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7684 }
7685
gen_sqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7686 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7687 {
7688 gen_srshr_bhs(d, s, i);
7689 tcg_gen_ext16u_i64(d, d);
7690 gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7691 }
7692
gen_sqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7693 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7694 {
7695 gen_srshr_bhs(d, s, i);
7696 tcg_gen_ext32u_i64(d, d);
7697 gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7698 }
7699
gen_sqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7700 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7701 {
7702 gen_srshr_d(d, s, i);
7703 gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7704 }
7705
gen_uqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7706 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7707 {
7708 gen_urshr_bhs(d, s, i);
7709 gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7710 }
7711
gen_uqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7712 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7713 {
7714 gen_urshr_bhs(d, s, i);
7715 gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7716 }
7717
gen_uqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7718 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7719 {
7720 gen_urshr_d(d, s, i);
7721 gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7722 }
7723
gen_sqrshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7724 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7725 {
7726 gen_srshr_bhs(d, s, i);
7727 tcg_gen_ext16u_i64(d, d);
7728 gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7729 }
7730
gen_sqrshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7731 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7732 {
7733 gen_srshr_bhs(d, s, i);
7734 tcg_gen_ext32u_i64(d, d);
7735 gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7736 }
7737
gen_sqrshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7738 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7739 {
7740 gen_srshr_d(d, s, i);
7741 gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7742 }
7743
7744 static WideShiftImmFn * const shrn_fns[] = {
7745 tcg_gen_shri_i64,
7746 tcg_gen_shri_i64,
7747 gen_ushr_d,
7748 };
7749 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7750
7751 static WideShiftImmFn * const rshrn_fns[] = {
7752 gen_urshr_bhs,
7753 gen_urshr_bhs,
7754 gen_urshr_d,
7755 };
7756 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7757
7758 static WideShiftImmFn * const sqshrn_fns[] = {
7759 gen_sqshrn_b,
7760 gen_sqshrn_h,
7761 gen_sqshrn_s,
7762 };
7763 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7764
7765 static WideShiftImmFn * const uqshrn_fns[] = {
7766 gen_uqshrn_b,
7767 gen_uqshrn_h,
7768 gen_uqshrn_s,
7769 };
7770 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7771
7772 static WideShiftImmFn * const sqshrun_fns[] = {
7773 gen_sqshrun_b,
7774 gen_sqshrun_h,
7775 gen_sqshrun_s,
7776 };
7777 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7778
7779 static WideShiftImmFn * const sqrshrn_fns[] = {
7780 gen_sqrshrn_b,
7781 gen_sqrshrn_h,
7782 gen_sqrshrn_s,
7783 };
7784 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7785
7786 static WideShiftImmFn * const uqrshrn_fns[] = {
7787 gen_uqrshrn_b,
7788 gen_uqrshrn_h,
7789 gen_uqrshrn_s,
7790 };
7791 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7792
7793 static WideShiftImmFn * const sqrshrun_fns[] = {
7794 gen_sqrshrun_b,
7795 gen_sqrshrun_h,
7796 gen_sqrshrun_s,
7797 };
TRANS(SQRSHRUN_v,do_vec_shift_imm_narrow,a,sqrshrun_fns,MO_SIGN)7798 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7799
7800 /*
7801 * Advanced SIMD Scalar Shift by Immediate
7802 */
7803
7804 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7805 WideShiftImmFn *fn, bool accumulate,
7806 MemOp sign)
7807 {
7808 if (fp_access_check(s)) {
7809 TCGv_i64 rd = tcg_temp_new_i64();
7810 TCGv_i64 rn = tcg_temp_new_i64();
7811
7812 read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7813 if (accumulate) {
7814 read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7815 }
7816 fn(rd, rn, a->imm);
7817 write_fp_dreg(s, a->rd, rd);
7818 }
7819 return true;
7820 }
7821
7822 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7823 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7824 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7825 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7826 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7827 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7828 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7829 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7830 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7831
7832 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7833 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7834
trunc_i64_env_imm(TCGv_i64 d,TCGv_i64 s,int64_t i,NeonGenTwoOpEnvFn * fn)7835 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7836 NeonGenTwoOpEnvFn *fn)
7837 {
7838 TCGv_i32 t = tcg_temp_new_i32();
7839 tcg_gen_extrl_i64_i32(t, s);
7840 fn(t, tcg_env, t, tcg_constant_i32(i));
7841 tcg_gen_extu_i32_i64(d, t);
7842 }
7843
gen_sqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7844 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7845 {
7846 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7847 }
7848
gen_sqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7849 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7850 {
7851 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7852 }
7853
gen_sqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7854 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7855 {
7856 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7857 }
7858
gen_sqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7859 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7860 {
7861 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7862 }
7863
gen_uqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7864 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7865 {
7866 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7867 }
7868
gen_uqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7869 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7870 {
7871 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7872 }
7873
gen_uqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7874 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7875 {
7876 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7877 }
7878
gen_uqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7879 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7880 {
7881 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7882 }
7883
gen_sqshlui_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7884 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7885 {
7886 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7887 }
7888
gen_sqshlui_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7889 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7890 {
7891 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7892 }
7893
gen_sqshlui_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7894 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7895 {
7896 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7897 }
7898
gen_sqshlui_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7899 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7900 {
7901 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7902 }
7903
7904 static WideShiftImmFn * const f_scalar_sqshli[] = {
7905 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7906 };
7907
7908 static WideShiftImmFn * const f_scalar_uqshli[] = {
7909 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7910 };
7911
7912 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7913 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7914 };
7915
7916 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7917 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7918 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7919 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7920
do_scalar_shift_imm_narrow(DisasContext * s,arg_rri_e * a,WideShiftImmFn * const fns[3],MemOp sign,bool zext)7921 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7922 WideShiftImmFn * const fns[3],
7923 MemOp sign, bool zext)
7924 {
7925 MemOp esz = a->esz;
7926
7927 tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7928
7929 if (fp_access_check(s)) {
7930 TCGv_i64 rd = tcg_temp_new_i64();
7931 TCGv_i64 rn = tcg_temp_new_i64();
7932
7933 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7934 fns[esz](rd, rn, a->imm);
7935 if (zext) {
7936 tcg_gen_ext_i64(rd, rd, esz);
7937 }
7938 write_fp_dreg(s, a->rd, rd);
7939 }
7940 return true;
7941 }
7942
TRANS(SQSHRN_si,do_scalar_shift_imm_narrow,a,sqshrn_fns,MO_SIGN,true)7943 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7944 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7945 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7946 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7947 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7948 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7949
7950 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
7951 {
7952 TCGv_i64 tcg_n, tcg_m, tcg_rd;
7953 tcg_rd = cpu_reg(s, a->rd);
7954
7955 if (!a->sf && is_signed) {
7956 tcg_n = tcg_temp_new_i64();
7957 tcg_m = tcg_temp_new_i64();
7958 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
7959 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
7960 } else {
7961 tcg_n = read_cpu_reg(s, a->rn, a->sf);
7962 tcg_m = read_cpu_reg(s, a->rm, a->sf);
7963 }
7964
7965 if (is_signed) {
7966 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7967 } else {
7968 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7969 }
7970
7971 if (!a->sf) { /* zero extend final result */
7972 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7973 }
7974 return true;
7975 }
7976
TRANS(SDIV,do_div,a,true)7977 TRANS(SDIV, do_div, a, true)
7978 TRANS(UDIV, do_div, a, false)
7979
7980 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7981 * Note that it is the caller's responsibility to ensure that the
7982 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7983 * mandated semantics for out of range shifts.
7984 */
7985 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7986 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7987 {
7988 switch (shift_type) {
7989 case A64_SHIFT_TYPE_LSL:
7990 tcg_gen_shl_i64(dst, src, shift_amount);
7991 break;
7992 case A64_SHIFT_TYPE_LSR:
7993 tcg_gen_shr_i64(dst, src, shift_amount);
7994 break;
7995 case A64_SHIFT_TYPE_ASR:
7996 if (!sf) {
7997 tcg_gen_ext32s_i64(dst, src);
7998 }
7999 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
8000 break;
8001 case A64_SHIFT_TYPE_ROR:
8002 if (sf) {
8003 tcg_gen_rotr_i64(dst, src, shift_amount);
8004 } else {
8005 TCGv_i32 t0, t1;
8006 t0 = tcg_temp_new_i32();
8007 t1 = tcg_temp_new_i32();
8008 tcg_gen_extrl_i64_i32(t0, src);
8009 tcg_gen_extrl_i64_i32(t1, shift_amount);
8010 tcg_gen_rotr_i32(t0, t0, t1);
8011 tcg_gen_extu_i32_i64(dst, t0);
8012 }
8013 break;
8014 default:
8015 assert(FALSE); /* all shift types should be handled */
8016 break;
8017 }
8018
8019 if (!sf) { /* zero extend final result */
8020 tcg_gen_ext32u_i64(dst, dst);
8021 }
8022 }
8023
8024 /* Shift a TCGv src by immediate, put result in dst.
8025 * The shift amount must be in range (this should always be true as the
8026 * relevant instructions will UNDEF on bad shift immediates).
8027 */
shift_reg_imm(TCGv_i64 dst,TCGv_i64 src,int sf,enum a64_shift_type shift_type,unsigned int shift_i)8028 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
8029 enum a64_shift_type shift_type, unsigned int shift_i)
8030 {
8031 assert(shift_i < (sf ? 64 : 32));
8032
8033 if (shift_i == 0) {
8034 tcg_gen_mov_i64(dst, src);
8035 } else {
8036 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
8037 }
8038 }
8039
do_shift_reg(DisasContext * s,arg_rrr_sf * a,enum a64_shift_type shift_type)8040 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
8041 enum a64_shift_type shift_type)
8042 {
8043 TCGv_i64 tcg_shift = tcg_temp_new_i64();
8044 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8045 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8046
8047 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
8048 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
8049 return true;
8050 }
8051
TRANS(LSLV,do_shift_reg,a,A64_SHIFT_TYPE_LSL)8052 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
8053 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
8054 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
8055 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
8056
8057 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
8058 {
8059 TCGv_i64 tcg_acc, tcg_val, tcg_rd;
8060 TCGv_i32 tcg_bytes;
8061
8062 switch (a->esz) {
8063 case MO_8:
8064 case MO_16:
8065 case MO_32:
8066 tcg_val = tcg_temp_new_i64();
8067 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
8068 break;
8069 case MO_64:
8070 tcg_val = cpu_reg(s, a->rm);
8071 break;
8072 default:
8073 g_assert_not_reached();
8074 }
8075 tcg_acc = cpu_reg(s, a->rn);
8076 tcg_bytes = tcg_constant_i32(1 << a->esz);
8077 tcg_rd = cpu_reg(s, a->rd);
8078
8079 if (crc32c) {
8080 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8081 } else {
8082 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8083 }
8084 return true;
8085 }
8086
TRANS_FEAT(CRC32,aa64_crc32,do_crc32,a,false)8087 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
8088 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
8089
8090 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
8091 {
8092 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
8093 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
8094 TCGv_i64 tcg_d = cpu_reg(s, a->rd);
8095
8096 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
8097 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
8098
8099 if (setflag) {
8100 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
8101 } else {
8102 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
8103 }
8104 return true;
8105 }
8106
TRANS_FEAT(SUBP,aa64_mte_insn_reg,do_subp,a,false)8107 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
8108 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
8109
8110 static bool trans_IRG(DisasContext *s, arg_rrr *a)
8111 {
8112 if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8113 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
8114 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
8115
8116 if (s->ata[0]) {
8117 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
8118 } else {
8119 gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
8120 }
8121 return true;
8122 }
8123 return false;
8124 }
8125
trans_GMI(DisasContext * s,arg_rrr * a)8126 static bool trans_GMI(DisasContext *s, arg_rrr *a)
8127 {
8128 if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8129 TCGv_i64 t = tcg_temp_new_i64();
8130
8131 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
8132 tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
8133 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
8134 return true;
8135 }
8136 return false;
8137 }
8138
trans_PACGA(DisasContext * s,arg_rrr * a)8139 static bool trans_PACGA(DisasContext *s, arg_rrr *a)
8140 {
8141 if (dc_isar_feature(aa64_pauth, s)) {
8142 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
8143 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
8144 return true;
8145 }
8146 return false;
8147 }
8148
8149 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
8150
gen_rr(DisasContext * s,int rd,int rn,ArithOneOp fn)8151 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
8152 {
8153 fn(cpu_reg(s, rd), cpu_reg(s, rn));
8154 return true;
8155 }
8156
gen_rbit32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8157 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8158 {
8159 TCGv_i32 t32 = tcg_temp_new_i32();
8160
8161 tcg_gen_extrl_i64_i32(t32, tcg_rn);
8162 gen_helper_rbit(t32, t32);
8163 tcg_gen_extu_i32_i64(tcg_rd, t32);
8164 }
8165
gen_rev16_xx(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn,TCGv_i64 mask)8166 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
8167 {
8168 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8169
8170 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
8171 tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
8172 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
8173 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
8174 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
8175 }
8176
gen_rev16_32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8177 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8178 {
8179 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
8180 }
8181
gen_rev16_64(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8182 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8183 {
8184 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
8185 }
8186
gen_rev_32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8187 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8188 {
8189 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
8190 }
8191
gen_rev32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8192 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8193 {
8194 tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
8195 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
8196 }
8197
8198 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
8199 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
8200 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
8201 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
8202
gen_clz32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8203 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8204 {
8205 TCGv_i32 t32 = tcg_temp_new_i32();
8206
8207 tcg_gen_extrl_i64_i32(t32, tcg_rn);
8208 tcg_gen_clzi_i32(t32, t32, 32);
8209 tcg_gen_extu_i32_i64(tcg_rd, t32);
8210 }
8211
gen_clz64(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8212 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8213 {
8214 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8215 }
8216
gen_cls32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8217 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8218 {
8219 TCGv_i32 t32 = tcg_temp_new_i32();
8220
8221 tcg_gen_extrl_i64_i32(t32, tcg_rn);
8222 tcg_gen_clrsb_i32(t32, t32);
8223 tcg_gen_extu_i32_i64(tcg_rd, t32);
8224 }
8225
8226 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
8227 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
8228
gen_pacaut(DisasContext * s,arg_pacaut * a,NeonGenTwo64OpEnvFn fn)8229 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
8230 {
8231 TCGv_i64 tcg_rd, tcg_rn;
8232
8233 if (a->z) {
8234 if (a->rn != 31) {
8235 return false;
8236 }
8237 tcg_rn = tcg_constant_i64(0);
8238 } else {
8239 tcg_rn = cpu_reg_sp(s, a->rn);
8240 }
8241 if (s->pauth_active) {
8242 tcg_rd = cpu_reg(s, a->rd);
8243 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
8244 }
8245 return true;
8246 }
8247
TRANS_FEAT(PACIA,aa64_pauth,gen_pacaut,a,gen_helper_pacia)8248 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
8249 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
8250 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
8251 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
8252
8253 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
8254 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
8255 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
8256 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
8257
8258 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
8259 {
8260 if (s->pauth_active) {
8261 TCGv_i64 tcg_rd = cpu_reg(s, rd);
8262 fn(tcg_rd, tcg_env, tcg_rd);
8263 }
8264 return true;
8265 }
8266
8267 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
8268 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
8269
do_logic_reg(DisasContext * s,arg_logic_shift * a,ArithTwoOp * fn,ArithTwoOp * inv_fn,bool setflags)8270 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
8271 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
8272 {
8273 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
8274
8275 if (!a->sf && (a->sa & (1 << 5))) {
8276 return false;
8277 }
8278
8279 tcg_rd = cpu_reg(s, a->rd);
8280 tcg_rn = cpu_reg(s, a->rn);
8281
8282 tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8283 if (a->sa) {
8284 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8285 }
8286
8287 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
8288 if (!a->sf) {
8289 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8290 }
8291 if (setflags) {
8292 gen_logic_CC(a->sf, tcg_rd);
8293 }
8294 return true;
8295 }
8296
trans_ORR_r(DisasContext * s,arg_logic_shift * a)8297 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
8298 {
8299 /*
8300 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
8301 * register-register MOV and MVN, so it is worth special casing.
8302 */
8303 if (a->sa == 0 && a->st == 0 && a->rn == 31) {
8304 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8305 TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8306
8307 if (a->n) {
8308 tcg_gen_not_i64(tcg_rd, tcg_rm);
8309 if (!a->sf) {
8310 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8311 }
8312 } else {
8313 if (a->sf) {
8314 tcg_gen_mov_i64(tcg_rd, tcg_rm);
8315 } else {
8316 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
8317 }
8318 }
8319 return true;
8320 }
8321
8322 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
8323 }
8324
TRANS(AND_r,do_logic_reg,a,tcg_gen_and_i64,tcg_gen_andc_i64,false)8325 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
8326 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
8327 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
8328
8329 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
8330 bool sub_op, bool setflags)
8331 {
8332 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
8333
8334 if (a->sa > 4) {
8335 return false;
8336 }
8337
8338 /* non-flag setting ops may use SP */
8339 if (!setflags) {
8340 tcg_rd = cpu_reg_sp(s, a->rd);
8341 } else {
8342 tcg_rd = cpu_reg(s, a->rd);
8343 }
8344 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
8345
8346 tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8347 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
8348
8349 tcg_result = tcg_temp_new_i64();
8350 if (!setflags) {
8351 if (sub_op) {
8352 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8353 } else {
8354 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8355 }
8356 } else {
8357 if (sub_op) {
8358 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8359 } else {
8360 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8361 }
8362 }
8363
8364 if (a->sf) {
8365 tcg_gen_mov_i64(tcg_rd, tcg_result);
8366 } else {
8367 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8368 }
8369 return true;
8370 }
8371
TRANS(ADD_ext,do_addsub_ext,a,false,false)8372 TRANS(ADD_ext, do_addsub_ext, a, false, false)
8373 TRANS(SUB_ext, do_addsub_ext, a, true, false)
8374 TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8375 TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8376
8377 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8378 bool sub_op, bool setflags)
8379 {
8380 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8381
8382 if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8383 return false;
8384 }
8385
8386 tcg_rd = cpu_reg(s, a->rd);
8387 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8388 tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8389
8390 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8391
8392 tcg_result = tcg_temp_new_i64();
8393 if (!setflags) {
8394 if (sub_op) {
8395 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8396 } else {
8397 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8398 }
8399 } else {
8400 if (sub_op) {
8401 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8402 } else {
8403 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8404 }
8405 }
8406
8407 if (a->sf) {
8408 tcg_gen_mov_i64(tcg_rd, tcg_result);
8409 } else {
8410 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8411 }
8412 return true;
8413 }
8414
TRANS(ADD_r,do_addsub_reg,a,false,false)8415 TRANS(ADD_r, do_addsub_reg, a, false, false)
8416 TRANS(SUB_r, do_addsub_reg, a, true, false)
8417 TRANS(ADDS_r, do_addsub_reg, a, false, true)
8418 TRANS(SUBS_r, do_addsub_reg, a, true, true)
8419
8420 static bool do_mulh(DisasContext *s, arg_rrr *a,
8421 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8422 {
8423 TCGv_i64 discard = tcg_temp_new_i64();
8424 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8425 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8426 TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8427
8428 fn(discard, tcg_rd, tcg_rn, tcg_rm);
8429 return true;
8430 }
8431
TRANS(SMULH,do_mulh,a,tcg_gen_muls2_i64)8432 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8433 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8434
8435 static bool do_muladd(DisasContext *s, arg_rrrr *a,
8436 bool sf, bool is_sub, MemOp mop)
8437 {
8438 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8439 TCGv_i64 tcg_op1, tcg_op2;
8440
8441 if (mop == MO_64) {
8442 tcg_op1 = cpu_reg(s, a->rn);
8443 tcg_op2 = cpu_reg(s, a->rm);
8444 } else {
8445 tcg_op1 = tcg_temp_new_i64();
8446 tcg_op2 = tcg_temp_new_i64();
8447 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
8448 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
8449 }
8450
8451 if (a->ra == 31 && !is_sub) {
8452 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
8453 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
8454 } else {
8455 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8456 TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
8457
8458 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
8459 if (is_sub) {
8460 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
8461 } else {
8462 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
8463 }
8464 }
8465
8466 if (!sf) {
8467 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8468 }
8469 return true;
8470 }
8471
TRANS(MADD_w,do_muladd,a,false,false,MO_64)8472 TRANS(MADD_w, do_muladd, a, false, false, MO_64)
8473 TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
8474 TRANS(MADD_x, do_muladd, a, true, false, MO_64)
8475 TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
8476
8477 TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
8478 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
8479 TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
8480 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
8481
8482 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
8483 bool is_sub, bool setflags)
8484 {
8485 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
8486
8487 tcg_rd = cpu_reg(s, a->rd);
8488 tcg_rn = cpu_reg(s, a->rn);
8489
8490 if (is_sub) {
8491 tcg_y = tcg_temp_new_i64();
8492 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
8493 } else {
8494 tcg_y = cpu_reg(s, a->rm);
8495 }
8496
8497 if (setflags) {
8498 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
8499 } else {
8500 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
8501 }
8502 return true;
8503 }
8504
TRANS(ADC,do_adc_sbc,a,false,false)8505 TRANS(ADC, do_adc_sbc, a, false, false)
8506 TRANS(SBC, do_adc_sbc, a, true, false)
8507 TRANS(ADCS, do_adc_sbc, a, false, true)
8508 TRANS(SBCS, do_adc_sbc, a, true, true)
8509
8510 static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
8511 {
8512 int mask = a->mask;
8513 TCGv_i64 tcg_rn;
8514 TCGv_i32 nzcv;
8515
8516 if (!dc_isar_feature(aa64_condm_4, s)) {
8517 return false;
8518 }
8519
8520 tcg_rn = read_cpu_reg(s, a->rn, 1);
8521 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
8522
8523 nzcv = tcg_temp_new_i32();
8524 tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
8525
8526 if (mask & 8) { /* N */
8527 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
8528 }
8529 if (mask & 4) { /* Z */
8530 tcg_gen_not_i32(cpu_ZF, nzcv);
8531 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
8532 }
8533 if (mask & 2) { /* C */
8534 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
8535 }
8536 if (mask & 1) { /* V */
8537 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
8538 }
8539 return true;
8540 }
8541
do_setf(DisasContext * s,int rn,int shift)8542 static bool do_setf(DisasContext *s, int rn, int shift)
8543 {
8544 TCGv_i32 tmp = tcg_temp_new_i32();
8545
8546 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
8547 tcg_gen_shli_i32(cpu_NF, tmp, shift);
8548 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
8549 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
8550 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
8551 return true;
8552 }
8553
8554 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
8555 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
8556
8557 /* CCMP, CCMN */
trans_CCMP(DisasContext * s,arg_CCMP * a)8558 static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
8559 {
8560 TCGv_i32 tcg_t0 = tcg_temp_new_i32();
8561 TCGv_i32 tcg_t1 = tcg_temp_new_i32();
8562 TCGv_i32 tcg_t2 = tcg_temp_new_i32();
8563 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8564 TCGv_i64 tcg_rn, tcg_y;
8565 DisasCompare c;
8566 unsigned nzcv;
8567 bool has_andc;
8568
8569 /* Set T0 = !COND. */
8570 arm_test_cc(&c, a->cond);
8571 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8572
8573 /* Load the arguments for the new comparison. */
8574 if (a->imm) {
8575 tcg_y = tcg_constant_i64(a->y);
8576 } else {
8577 tcg_y = cpu_reg(s, a->y);
8578 }
8579 tcg_rn = cpu_reg(s, a->rn);
8580
8581 /* Set the flags for the new comparison. */
8582 if (a->op) {
8583 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8584 } else {
8585 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8586 }
8587
8588 /*
8589 * If COND was false, force the flags to #nzcv. Compute two masks
8590 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8591 * For tcg hosts that support ANDC, we can make do with just T1.
8592 * In either case, allow the tcg optimizer to delete any unused mask.
8593 */
8594 tcg_gen_neg_i32(tcg_t1, tcg_t0);
8595 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8596
8597 nzcv = a->nzcv;
8598 has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0);
8599 if (nzcv & 8) { /* N */
8600 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8601 } else {
8602 if (has_andc) {
8603 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8604 } else {
8605 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8606 }
8607 }
8608 if (nzcv & 4) { /* Z */
8609 if (has_andc) {
8610 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8611 } else {
8612 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8613 }
8614 } else {
8615 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8616 }
8617 if (nzcv & 2) { /* C */
8618 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8619 } else {
8620 if (has_andc) {
8621 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8622 } else {
8623 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8624 }
8625 }
8626 if (nzcv & 1) { /* V */
8627 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8628 } else {
8629 if (has_andc) {
8630 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8631 } else {
8632 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8633 }
8634 }
8635 return true;
8636 }
8637
trans_CSEL(DisasContext * s,arg_CSEL * a)8638 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8639 {
8640 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8641 TCGv_i64 zero = tcg_constant_i64(0);
8642 DisasCompare64 c;
8643
8644 a64_test_cc(&c, a->cond);
8645
8646 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
8647 /* CSET & CSETM. */
8648 if (a->else_inv) {
8649 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8650 tcg_rd, c.value, zero);
8651 } else {
8652 tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8653 tcg_rd, c.value, zero);
8654 }
8655 } else {
8656 TCGv_i64 t_true = cpu_reg(s, a->rn);
8657 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
8658
8659 if (a->else_inv && a->else_inc) {
8660 tcg_gen_neg_i64(t_false, t_false);
8661 } else if (a->else_inv) {
8662 tcg_gen_not_i64(t_false, t_false);
8663 } else if (a->else_inc) {
8664 tcg_gen_addi_i64(t_false, t_false, 1);
8665 }
8666 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8667 }
8668
8669 if (!a->sf) {
8670 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8671 }
8672 return true;
8673 }
8674
8675 typedef struct FPScalar1Int {
8676 void (*gen_h)(TCGv_i32, TCGv_i32);
8677 void (*gen_s)(TCGv_i32, TCGv_i32);
8678 void (*gen_d)(TCGv_i64, TCGv_i64);
8679 } FPScalar1Int;
8680
do_fp1_scalar_int(DisasContext * s,arg_rr_e * a,const FPScalar1Int * f,bool merging)8681 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
8682 const FPScalar1Int *f,
8683 bool merging)
8684 {
8685 switch (a->esz) {
8686 case MO_64:
8687 if (fp_access_check(s)) {
8688 TCGv_i64 t = read_fp_dreg(s, a->rn);
8689 f->gen_d(t, t);
8690 if (merging) {
8691 write_fp_dreg_merging(s, a->rd, a->rd, t);
8692 } else {
8693 write_fp_dreg(s, a->rd, t);
8694 }
8695 }
8696 break;
8697 case MO_32:
8698 if (fp_access_check(s)) {
8699 TCGv_i32 t = read_fp_sreg(s, a->rn);
8700 f->gen_s(t, t);
8701 if (merging) {
8702 write_fp_sreg_merging(s, a->rd, a->rd, t);
8703 } else {
8704 write_fp_sreg(s, a->rd, t);
8705 }
8706 }
8707 break;
8708 case MO_16:
8709 if (!dc_isar_feature(aa64_fp16, s)) {
8710 return false;
8711 }
8712 if (fp_access_check(s)) {
8713 TCGv_i32 t = read_fp_hreg(s, a->rn);
8714 f->gen_h(t, t);
8715 if (merging) {
8716 write_fp_hreg_merging(s, a->rd, a->rd, t);
8717 } else {
8718 write_fp_sreg(s, a->rd, t);
8719 }
8720 }
8721 break;
8722 default:
8723 return false;
8724 }
8725 return true;
8726 }
8727
do_fp1_scalar_int_2fn(DisasContext * s,arg_rr_e * a,const FPScalar1Int * fnormal,const FPScalar1Int * fah)8728 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
8729 const FPScalar1Int *fnormal,
8730 const FPScalar1Int *fah)
8731 {
8732 return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
8733 }
8734
8735 static const FPScalar1Int f_scalar_fmov = {
8736 tcg_gen_mov_i32,
8737 tcg_gen_mov_i32,
8738 tcg_gen_mov_i64,
8739 };
8740 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
8741
8742 static const FPScalar1Int f_scalar_fabs = {
8743 gen_vfp_absh,
8744 gen_vfp_abss,
8745 gen_vfp_absd,
8746 };
8747 static const FPScalar1Int f_scalar_ah_fabs = {
8748 gen_vfp_ah_absh,
8749 gen_vfp_ah_abss,
8750 gen_vfp_ah_absd,
8751 };
8752 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
8753
8754 static const FPScalar1Int f_scalar_fneg = {
8755 gen_vfp_negh,
8756 gen_vfp_negs,
8757 gen_vfp_negd,
8758 };
8759 static const FPScalar1Int f_scalar_ah_fneg = {
8760 gen_vfp_ah_negh,
8761 gen_vfp_ah_negs,
8762 gen_vfp_ah_negd,
8763 };
8764 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
8765
8766 typedef struct FPScalar1 {
8767 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
8768 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
8769 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
8770 } FPScalar1;
8771
do_fp1_scalar_with_fpsttype(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode,ARMFPStatusFlavour fpsttype)8772 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
8773 const FPScalar1 *f, int rmode,
8774 ARMFPStatusFlavour fpsttype)
8775 {
8776 TCGv_i32 tcg_rmode = NULL;
8777 TCGv_ptr fpst;
8778 TCGv_i64 t64;
8779 TCGv_i32 t32;
8780 int check = fp_access_check_scalar_hsd(s, a->esz);
8781
8782 if (check <= 0) {
8783 return check == 0;
8784 }
8785
8786 fpst = fpstatus_ptr(fpsttype);
8787 if (rmode >= 0) {
8788 tcg_rmode = gen_set_rmode(rmode, fpst);
8789 }
8790
8791 switch (a->esz) {
8792 case MO_64:
8793 t64 = read_fp_dreg(s, a->rn);
8794 f->gen_d(t64, t64, fpst);
8795 write_fp_dreg_merging(s, a->rd, a->rd, t64);
8796 break;
8797 case MO_32:
8798 t32 = read_fp_sreg(s, a->rn);
8799 f->gen_s(t32, t32, fpst);
8800 write_fp_sreg_merging(s, a->rd, a->rd, t32);
8801 break;
8802 case MO_16:
8803 t32 = read_fp_hreg(s, a->rn);
8804 f->gen_h(t32, t32, fpst);
8805 write_fp_hreg_merging(s, a->rd, a->rd, t32);
8806 break;
8807 default:
8808 g_assert_not_reached();
8809 }
8810
8811 if (rmode >= 0) {
8812 gen_restore_rmode(tcg_rmode, fpst);
8813 }
8814 return true;
8815 }
8816
do_fp1_scalar(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode)8817 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
8818 const FPScalar1 *f, int rmode)
8819 {
8820 return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
8821 a->esz == MO_16 ?
8822 FPST_A64_F16 : FPST_A64);
8823 }
8824
do_fp1_scalar_ah(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode)8825 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
8826 const FPScalar1 *f, int rmode)
8827 {
8828 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
8829 }
8830
8831 static const FPScalar1 f_scalar_fsqrt = {
8832 gen_helper_vfp_sqrth,
8833 gen_helper_vfp_sqrts,
8834 gen_helper_vfp_sqrtd,
8835 };
8836 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
8837
8838 static const FPScalar1 f_scalar_frint = {
8839 gen_helper_advsimd_rinth,
8840 gen_helper_rints,
8841 gen_helper_rintd,
8842 };
8843 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
8844 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
8845 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
8846 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
8847 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
8848 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
8849
8850 static const FPScalar1 f_scalar_frintx = {
8851 gen_helper_advsimd_rinth_exact,
8852 gen_helper_rints_exact,
8853 gen_helper_rintd_exact,
8854 };
8855 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
8856
trans_BFCVT_s(DisasContext * s,arg_rr_e * a)8857 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
8858 {
8859 ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
8860 TCGv_i32 t32;
8861 int check;
8862
8863 if (!dc_isar_feature(aa64_bf16, s)) {
8864 return false;
8865 }
8866
8867 check = fp_access_check_scalar_hsd(s, a->esz);
8868
8869 if (check <= 0) {
8870 return check == 0;
8871 }
8872
8873 t32 = read_fp_sreg(s, a->rn);
8874 gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
8875 write_fp_hreg_merging(s, a->rd, a->rd, t32);
8876 return true;
8877 }
8878
8879 static const FPScalar1 f_scalar_frint32 = {
8880 NULL,
8881 gen_helper_frint32_s,
8882 gen_helper_frint32_d,
8883 };
8884 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
8885 &f_scalar_frint32, FPROUNDING_ZERO)
8886 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
8887
8888 static const FPScalar1 f_scalar_frint64 = {
8889 NULL,
8890 gen_helper_frint64_s,
8891 gen_helper_frint64_d,
8892 };
8893 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
8894 &f_scalar_frint64, FPROUNDING_ZERO)
8895 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
8896
8897 static const FPScalar1 f_scalar_frecpe = {
8898 gen_helper_recpe_f16,
8899 gen_helper_recpe_f32,
8900 gen_helper_recpe_f64,
8901 };
8902 static const FPScalar1 f_scalar_frecpe_rpres = {
8903 gen_helper_recpe_f16,
8904 gen_helper_recpe_rpres_f32,
8905 gen_helper_recpe_f64,
8906 };
8907 TRANS(FRECPE_s, do_fp1_scalar_ah, a,
8908 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8909 &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
8910
8911 static const FPScalar1 f_scalar_frecpx = {
8912 gen_helper_frecpx_f16,
8913 gen_helper_frecpx_f32,
8914 gen_helper_frecpx_f64,
8915 };
8916 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
8917
8918 static const FPScalar1 f_scalar_frsqrte = {
8919 gen_helper_rsqrte_f16,
8920 gen_helper_rsqrte_f32,
8921 gen_helper_rsqrte_f64,
8922 };
8923 static const FPScalar1 f_scalar_frsqrte_rpres = {
8924 gen_helper_rsqrte_f16,
8925 gen_helper_rsqrte_rpres_f32,
8926 gen_helper_rsqrte_f64,
8927 };
8928 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
8929 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8930 &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
8931
trans_FCVT_s_ds(DisasContext * s,arg_rr * a)8932 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
8933 {
8934 if (fp_access_check(s)) {
8935 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
8936 TCGv_i64 tcg_rd = tcg_temp_new_i64();
8937 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8938
8939 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
8940 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
8941 }
8942 return true;
8943 }
8944
trans_FCVT_s_hs(DisasContext * s,arg_rr * a)8945 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
8946 {
8947 if (fp_access_check(s)) {
8948 TCGv_i32 tmp = read_fp_sreg(s, a->rn);
8949 TCGv_i32 ahp = get_ahp_flag();
8950 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8951
8952 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
8953 /* write_fp_hreg_merging is OK here because top half of result is zero */
8954 write_fp_hreg_merging(s, a->rd, a->rd, tmp);
8955 }
8956 return true;
8957 }
8958
trans_FCVT_s_sd(DisasContext * s,arg_rr * a)8959 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
8960 {
8961 if (fp_access_check(s)) {
8962 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8963 TCGv_i32 tcg_rd = tcg_temp_new_i32();
8964 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8965
8966 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
8967 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
8968 }
8969 return true;
8970 }
8971
trans_FCVT_s_hd(DisasContext * s,arg_rr * a)8972 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
8973 {
8974 if (fp_access_check(s)) {
8975 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8976 TCGv_i32 tcg_rd = tcg_temp_new_i32();
8977 TCGv_i32 ahp = get_ahp_flag();
8978 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8979
8980 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8981 /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
8982 write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
8983 }
8984 return true;
8985 }
8986
trans_FCVT_s_sh(DisasContext * s,arg_rr * a)8987 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
8988 {
8989 if (fp_access_check(s)) {
8990 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8991 TCGv_i32 tcg_rd = tcg_temp_new_i32();
8992 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
8993 TCGv_i32 tcg_ahp = get_ahp_flag();
8994
8995 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8996 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
8997 }
8998 return true;
8999 }
9000
trans_FCVT_s_dh(DisasContext * s,arg_rr * a)9001 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
9002 {
9003 if (fp_access_check(s)) {
9004 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
9005 TCGv_i64 tcg_rd = tcg_temp_new_i64();
9006 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
9007 TCGv_i32 tcg_ahp = get_ahp_flag();
9008
9009 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9010 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
9011 }
9012 return true;
9013 }
9014
do_cvtf_scalar(DisasContext * s,MemOp esz,int rd,int shift,TCGv_i64 tcg_int,bool is_signed)9015 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
9016 TCGv_i64 tcg_int, bool is_signed)
9017 {
9018 TCGv_ptr tcg_fpstatus;
9019 TCGv_i32 tcg_shift, tcg_single;
9020 TCGv_i64 tcg_double;
9021
9022 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9023 tcg_shift = tcg_constant_i32(shift);
9024
9025 switch (esz) {
9026 case MO_64:
9027 tcg_double = tcg_temp_new_i64();
9028 if (is_signed) {
9029 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9030 } else {
9031 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9032 }
9033 write_fp_dreg_merging(s, rd, rd, tcg_double);
9034 break;
9035
9036 case MO_32:
9037 tcg_single = tcg_temp_new_i32();
9038 if (is_signed) {
9039 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9040 } else {
9041 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9042 }
9043 write_fp_sreg_merging(s, rd, rd, tcg_single);
9044 break;
9045
9046 case MO_16:
9047 tcg_single = tcg_temp_new_i32();
9048 if (is_signed) {
9049 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9050 } else {
9051 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9052 }
9053 write_fp_hreg_merging(s, rd, rd, tcg_single);
9054 break;
9055
9056 default:
9057 g_assert_not_reached();
9058 }
9059 return true;
9060 }
9061
do_cvtf_g(DisasContext * s,arg_fcvt * a,bool is_signed)9062 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
9063 {
9064 TCGv_i64 tcg_int;
9065 int check = fp_access_check_scalar_hsd(s, a->esz);
9066
9067 if (check <= 0) {
9068 return check == 0;
9069 }
9070
9071 if (a->sf) {
9072 tcg_int = cpu_reg(s, a->rn);
9073 } else {
9074 tcg_int = read_cpu_reg(s, a->rn, true);
9075 if (is_signed) {
9076 tcg_gen_ext32s_i64(tcg_int, tcg_int);
9077 } else {
9078 tcg_gen_ext32u_i64(tcg_int, tcg_int);
9079 }
9080 }
9081 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9082 }
9083
TRANS(SCVTF_g,do_cvtf_g,a,true)9084 TRANS(SCVTF_g, do_cvtf_g, a, true)
9085 TRANS(UCVTF_g, do_cvtf_g, a, false)
9086
9087 /*
9088 * [US]CVTF (vector), scalar version.
9089 * Which sounds weird, but really just means input from fp register
9090 * instead of input from general register. Input and output element
9091 * size are always equal.
9092 */
9093 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
9094 {
9095 TCGv_i64 tcg_int;
9096 int check = fp_access_check_scalar_hsd(s, a->esz);
9097
9098 if (check <= 0) {
9099 return check == 0;
9100 }
9101
9102 tcg_int = tcg_temp_new_i64();
9103 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
9104 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9105 }
9106
TRANS(SCVTF_f,do_cvtf_f,a,true)9107 TRANS(SCVTF_f, do_cvtf_f, a, true)
9108 TRANS(UCVTF_f, do_cvtf_f, a, false)
9109
9110 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
9111 TCGv_i64 tcg_out, int shift, int rn,
9112 ARMFPRounding rmode)
9113 {
9114 TCGv_ptr tcg_fpstatus;
9115 TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
9116
9117 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9118 tcg_shift = tcg_constant_i32(shift);
9119 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9120
9121 switch (esz) {
9122 case MO_64:
9123 read_vec_element(s, tcg_out, rn, 0, MO_64);
9124 switch (out) {
9125 case MO_64 | MO_SIGN:
9126 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9127 break;
9128 case MO_64:
9129 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9130 break;
9131 case MO_32 | MO_SIGN:
9132 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9133 break;
9134 case MO_32:
9135 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9136 break;
9137 default:
9138 g_assert_not_reached();
9139 }
9140 break;
9141
9142 case MO_32:
9143 tcg_single = read_fp_sreg(s, rn);
9144 switch (out) {
9145 case MO_64 | MO_SIGN:
9146 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9147 break;
9148 case MO_64:
9149 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9150 break;
9151 case MO_32 | MO_SIGN:
9152 gen_helper_vfp_tosls(tcg_single, tcg_single,
9153 tcg_shift, tcg_fpstatus);
9154 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9155 break;
9156 case MO_32:
9157 gen_helper_vfp_touls(tcg_single, tcg_single,
9158 tcg_shift, tcg_fpstatus);
9159 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9160 break;
9161 default:
9162 g_assert_not_reached();
9163 }
9164 break;
9165
9166 case MO_16:
9167 tcg_single = read_fp_hreg(s, rn);
9168 switch (out) {
9169 case MO_64 | MO_SIGN:
9170 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9171 break;
9172 case MO_64:
9173 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9174 break;
9175 case MO_32 | MO_SIGN:
9176 gen_helper_vfp_toslh(tcg_single, tcg_single,
9177 tcg_shift, tcg_fpstatus);
9178 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9179 break;
9180 case MO_32:
9181 gen_helper_vfp_toulh(tcg_single, tcg_single,
9182 tcg_shift, tcg_fpstatus);
9183 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9184 break;
9185 case MO_16 | MO_SIGN:
9186 gen_helper_vfp_toshh(tcg_single, tcg_single,
9187 tcg_shift, tcg_fpstatus);
9188 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9189 break;
9190 case MO_16:
9191 gen_helper_vfp_touhh(tcg_single, tcg_single,
9192 tcg_shift, tcg_fpstatus);
9193 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9194 break;
9195 default:
9196 g_assert_not_reached();
9197 }
9198 break;
9199
9200 default:
9201 g_assert_not_reached();
9202 }
9203
9204 gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9205 }
9206
do_fcvt_g(DisasContext * s,arg_fcvt * a,ARMFPRounding rmode,bool is_signed)9207 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
9208 ARMFPRounding rmode, bool is_signed)
9209 {
9210 TCGv_i64 tcg_int;
9211 int check = fp_access_check_scalar_hsd(s, a->esz);
9212
9213 if (check <= 0) {
9214 return check == 0;
9215 }
9216
9217 tcg_int = cpu_reg(s, a->rd);
9218 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
9219 a->esz, tcg_int, a->shift, a->rn, rmode);
9220
9221 if (!a->sf) {
9222 tcg_gen_ext32u_i64(tcg_int, tcg_int);
9223 }
9224 return true;
9225 }
9226
TRANS(FCVTNS_g,do_fcvt_g,a,FPROUNDING_TIEEVEN,true)9227 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
9228 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
9229 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
9230 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
9231 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
9232 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
9233 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
9234 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
9235 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
9236 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
9237
9238 /*
9239 * FCVT* (vector), scalar version.
9240 * Which sounds weird, but really just means output to fp register
9241 * instead of output to general register. Input and output element
9242 * size are always equal.
9243 */
9244 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
9245 ARMFPRounding rmode, bool is_signed)
9246 {
9247 TCGv_i64 tcg_int;
9248 int check = fp_access_check_scalar_hsd(s, a->esz);
9249
9250 if (check <= 0) {
9251 return check == 0;
9252 }
9253
9254 tcg_int = tcg_temp_new_i64();
9255 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
9256 a->esz, tcg_int, a->shift, a->rn, rmode);
9257
9258 if (!s->fpcr_nep) {
9259 clear_vec(s, a->rd);
9260 }
9261 write_vec_element(s, tcg_int, a->rd, 0, a->esz);
9262 return true;
9263 }
9264
TRANS(FCVTNS_f,do_fcvt_f,a,FPROUNDING_TIEEVEN,true)9265 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
9266 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
9267 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
9268 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
9269 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
9270 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
9271 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
9272 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
9273 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
9274 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
9275
9276 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
9277 {
9278 if (!dc_isar_feature(aa64_jscvt, s)) {
9279 return false;
9280 }
9281 if (fp_access_check(s)) {
9282 TCGv_i64 t = read_fp_dreg(s, a->rn);
9283 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
9284
9285 gen_helper_fjcvtzs(t, t, fpstatus);
9286
9287 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
9288 tcg_gen_extrh_i64_i32(cpu_ZF, t);
9289 tcg_gen_movi_i32(cpu_CF, 0);
9290 tcg_gen_movi_i32(cpu_NF, 0);
9291 tcg_gen_movi_i32(cpu_VF, 0);
9292 }
9293 return true;
9294 }
9295
trans_FMOV_hx(DisasContext * s,arg_rr * a)9296 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
9297 {
9298 if (!dc_isar_feature(aa64_fp16, s)) {
9299 return false;
9300 }
9301 if (fp_access_check(s)) {
9302 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9303 TCGv_i64 tmp = tcg_temp_new_i64();
9304 tcg_gen_ext16u_i64(tmp, tcg_rn);
9305 write_fp_dreg(s, a->rd, tmp);
9306 }
9307 return true;
9308 }
9309
trans_FMOV_sw(DisasContext * s,arg_rr * a)9310 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
9311 {
9312 if (fp_access_check(s)) {
9313 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9314 TCGv_i64 tmp = tcg_temp_new_i64();
9315 tcg_gen_ext32u_i64(tmp, tcg_rn);
9316 write_fp_dreg(s, a->rd, tmp);
9317 }
9318 return true;
9319 }
9320
trans_FMOV_dx(DisasContext * s,arg_rr * a)9321 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
9322 {
9323 if (fp_access_check(s)) {
9324 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9325 write_fp_dreg(s, a->rd, tcg_rn);
9326 }
9327 return true;
9328 }
9329
trans_FMOV_ux(DisasContext * s,arg_rr * a)9330 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
9331 {
9332 if (fp_access_check(s)) {
9333 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9334 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
9335 clear_vec_high(s, true, a->rd);
9336 }
9337 return true;
9338 }
9339
trans_FMOV_xh(DisasContext * s,arg_rr * a)9340 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
9341 {
9342 if (!dc_isar_feature(aa64_fp16, s)) {
9343 return false;
9344 }
9345 if (fp_access_check(s)) {
9346 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9347 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
9348 }
9349 return true;
9350 }
9351
trans_FMOV_ws(DisasContext * s,arg_rr * a)9352 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
9353 {
9354 if (fp_access_check(s)) {
9355 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9356 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
9357 }
9358 return true;
9359 }
9360
trans_FMOV_xd(DisasContext * s,arg_rr * a)9361 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
9362 {
9363 if (fp_access_check(s)) {
9364 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9365 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
9366 }
9367 return true;
9368 }
9369
trans_FMOV_xu(DisasContext * s,arg_rr * a)9370 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
9371 {
9372 if (fp_access_check(s)) {
9373 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9374 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
9375 }
9376 return true;
9377 }
9378
9379 typedef struct ENVScalar1 {
9380 NeonGenOneOpEnvFn *gen_bhs[3];
9381 NeonGenOne64OpEnvFn *gen_d;
9382 } ENVScalar1;
9383
do_env_scalar1(DisasContext * s,arg_rr_e * a,const ENVScalar1 * f)9384 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
9385 {
9386 if (!fp_access_check(s)) {
9387 return true;
9388 }
9389 if (a->esz == MO_64) {
9390 TCGv_i64 t = read_fp_dreg(s, a->rn);
9391 f->gen_d(t, tcg_env, t);
9392 write_fp_dreg(s, a->rd, t);
9393 } else {
9394 TCGv_i32 t = tcg_temp_new_i32();
9395
9396 read_vec_element_i32(s, t, a->rn, 0, a->esz);
9397 f->gen_bhs[a->esz](t, tcg_env, t);
9398 write_fp_sreg(s, a->rd, t);
9399 }
9400 return true;
9401 }
9402
do_env_vector1(DisasContext * s,arg_qrr_e * a,const ENVScalar1 * f)9403 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
9404 {
9405 if (a->esz == MO_64 && !a->q) {
9406 return false;
9407 }
9408 if (!fp_access_check(s)) {
9409 return true;
9410 }
9411 if (a->esz == MO_64) {
9412 TCGv_i64 t = tcg_temp_new_i64();
9413
9414 for (int i = 0; i < 2; ++i) {
9415 read_vec_element(s, t, a->rn, i, MO_64);
9416 f->gen_d(t, tcg_env, t);
9417 write_vec_element(s, t, a->rd, i, MO_64);
9418 }
9419 } else {
9420 TCGv_i32 t = tcg_temp_new_i32();
9421 int n = (a->q ? 16 : 8) >> a->esz;
9422
9423 for (int i = 0; i < n; ++i) {
9424 read_vec_element_i32(s, t, a->rn, i, a->esz);
9425 f->gen_bhs[a->esz](t, tcg_env, t);
9426 write_vec_element_i32(s, t, a->rd, i, a->esz);
9427 }
9428 }
9429 clear_vec_high(s, a->q, a->rd);
9430 return true;
9431 }
9432
9433 static const ENVScalar1 f_scalar_sqabs = {
9434 { gen_helper_neon_qabs_s8,
9435 gen_helper_neon_qabs_s16,
9436 gen_helper_neon_qabs_s32 },
9437 gen_helper_neon_qabs_s64,
9438 };
9439 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9440 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9441
9442 static const ENVScalar1 f_scalar_sqneg = {
9443 { gen_helper_neon_qneg_s8,
9444 gen_helper_neon_qneg_s16,
9445 gen_helper_neon_qneg_s32 },
9446 gen_helper_neon_qneg_s64,
9447 };
9448 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
9449 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
9450
do_scalar1_d(DisasContext * s,arg_rr * a,ArithOneOp * f)9451 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
9452 {
9453 if (fp_access_check(s)) {
9454 TCGv_i64 t = read_fp_dreg(s, a->rn);
9455 f(t, t);
9456 write_fp_dreg(s, a->rd, t);
9457 }
9458 return true;
9459 }
9460
TRANS(ABS_s,do_scalar1_d,a,tcg_gen_abs_i64)9461 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
9462 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
9463
9464 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
9465 {
9466 if (fp_access_check(s)) {
9467 TCGv_i64 t = read_fp_dreg(s, a->rn);
9468 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
9469 write_fp_dreg(s, a->rd, t);
9470 }
9471 return true;
9472 }
9473
TRANS(CMGT0_s,do_cmop0_d,a,TCG_COND_GT)9474 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
9475 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
9476 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
9477 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
9478 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
9479
9480 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
9481 ArithOneOp * const fn[3])
9482 {
9483 if (a->esz == MO_64) {
9484 return false;
9485 }
9486 if (fp_access_check(s)) {
9487 TCGv_i64 t = tcg_temp_new_i64();
9488
9489 read_vec_element(s, t, a->rn, 0, a->esz + 1);
9490 fn[a->esz](t, t);
9491 clear_vec(s, a->rd);
9492 write_vec_element(s, t, a->rd, 0, a->esz);
9493 }
9494 return true;
9495 }
9496
9497 #define WRAP_ENV(NAME) \
9498 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
9499 { gen_helper_##NAME(d, tcg_env, n); }
9500
9501 WRAP_ENV(neon_unarrow_sat8)
9502 WRAP_ENV(neon_unarrow_sat16)
9503 WRAP_ENV(neon_unarrow_sat32)
9504
9505 static ArithOneOp * const f_scalar_sqxtun[] = {
9506 gen_neon_unarrow_sat8,
9507 gen_neon_unarrow_sat16,
9508 gen_neon_unarrow_sat32,
9509 };
9510 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
9511
9512 WRAP_ENV(neon_narrow_sat_s8)
9513 WRAP_ENV(neon_narrow_sat_s16)
9514 WRAP_ENV(neon_narrow_sat_s32)
9515
9516 static ArithOneOp * const f_scalar_sqxtn[] = {
9517 gen_neon_narrow_sat_s8,
9518 gen_neon_narrow_sat_s16,
9519 gen_neon_narrow_sat_s32,
9520 };
9521 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
9522
9523 WRAP_ENV(neon_narrow_sat_u8)
9524 WRAP_ENV(neon_narrow_sat_u16)
9525 WRAP_ENV(neon_narrow_sat_u32)
9526
9527 static ArithOneOp * const f_scalar_uqxtn[] = {
9528 gen_neon_narrow_sat_u8,
9529 gen_neon_narrow_sat_u16,
9530 gen_neon_narrow_sat_u32,
9531 };
TRANS(UQXTN_s,do_2misc_narrow_scalar,a,f_scalar_uqxtn)9532 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
9533
9534 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
9535 {
9536 if (fp_access_check(s)) {
9537 /*
9538 * 64 bit to 32 bit float conversion
9539 * with von Neumann rounding (round to odd)
9540 */
9541 TCGv_i64 src = read_fp_dreg(s, a->rn);
9542 TCGv_i32 dst = tcg_temp_new_i32();
9543 gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
9544 write_fp_sreg_merging(s, a->rd, a->rd, dst);
9545 }
9546 return true;
9547 }
9548
9549 #undef WRAP_ENV
9550
do_gvec_fn2(DisasContext * s,arg_qrr_e * a,GVecGen2Fn * fn)9551 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9552 {
9553 if (!a->q && a->esz == MO_64) {
9554 return false;
9555 }
9556 if (fp_access_check(s)) {
9557 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9558 }
9559 return true;
9560 }
9561
TRANS(ABS_v,do_gvec_fn2,a,tcg_gen_gvec_abs)9562 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
9563 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
9564 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
9565 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
9566 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
9567 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
9568 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
9569 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
9570 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
9571 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
9572 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
9573 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
9574 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
9575 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
9576
9577 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9578 {
9579 if (a->esz == MO_64) {
9580 return false;
9581 }
9582 if (fp_access_check(s)) {
9583 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9584 }
9585 return true;
9586 }
9587
TRANS(CLS_v,do_gvec_fn2_bhs,a,gen_gvec_cls)9588 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
9589 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
9590 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
9591 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
9592 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
9593 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
9594 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
9595
9596 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
9597 ArithOneOp * const fn[3])
9598 {
9599 if (a->esz == MO_64) {
9600 return false;
9601 }
9602 if (fp_access_check(s)) {
9603 TCGv_i64 t0 = tcg_temp_new_i64();
9604 TCGv_i64 t1 = tcg_temp_new_i64();
9605
9606 read_vec_element(s, t0, a->rn, 0, MO_64);
9607 read_vec_element(s, t1, a->rn, 1, MO_64);
9608 fn[a->esz](t0, t0);
9609 fn[a->esz](t1, t1);
9610 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
9611 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
9612 clear_vec_high(s, a->q, a->rd);
9613 }
9614 return true;
9615 }
9616
9617 static ArithOneOp * const f_scalar_xtn[] = {
9618 gen_helper_neon_narrow_u8,
9619 gen_helper_neon_narrow_u16,
9620 tcg_gen_ext32u_i64,
9621 };
TRANS(XTN,do_2misc_narrow_vector,a,f_scalar_xtn)9622 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
9623 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
9624 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
9625 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
9626
9627 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9628 {
9629 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9630 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9631 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9632 TCGv_i32 ahp = get_ahp_flag();
9633
9634 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
9635 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9636 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9637 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
9638 tcg_gen_extu_i32_i64(d, tcg_lo);
9639 }
9640
gen_fcvtn_sd(TCGv_i64 d,TCGv_i64 n)9641 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
9642 {
9643 TCGv_i32 tmp = tcg_temp_new_i32();
9644 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9645
9646 gen_helper_vfp_fcvtsd(tmp, n, fpst);
9647 tcg_gen_extu_i32_i64(d, tmp);
9648 }
9649
gen_fcvtxn_sd(TCGv_i64 d,TCGv_i64 n)9650 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
9651 {
9652 /*
9653 * 64 bit to 32 bit float conversion
9654 * with von Neumann rounding (round to odd)
9655 */
9656 TCGv_i32 tmp = tcg_temp_new_i32();
9657 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
9658 tcg_gen_extu_i32_i64(d, tmp);
9659 }
9660
9661 static ArithOneOp * const f_vector_fcvtn[] = {
9662 NULL,
9663 gen_fcvtn_hs,
9664 gen_fcvtn_sd,
9665 };
9666 static ArithOneOp * const f_scalar_fcvtxn[] = {
9667 NULL,
9668 NULL,
9669 gen_fcvtxn_sd,
9670 };
TRANS(FCVTN_v,do_2misc_narrow_vector,a,f_vector_fcvtn)9671 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
9672 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
9673
9674 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9675 {
9676 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9677 TCGv_i32 tmp = tcg_temp_new_i32();
9678 gen_helper_bfcvt_pair(tmp, n, fpst);
9679 tcg_gen_extu_i32_i64(d, tmp);
9680 }
9681
gen_bfcvtn_ah_hs(TCGv_i64 d,TCGv_i64 n)9682 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
9683 {
9684 TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
9685 TCGv_i32 tmp = tcg_temp_new_i32();
9686 gen_helper_bfcvt_pair(tmp, n, fpst);
9687 tcg_gen_extu_i32_i64(d, tmp);
9688 }
9689
9690 static ArithOneOp * const f_vector_bfcvtn[2][3] = {
9691 {
9692 NULL,
9693 gen_bfcvtn_hs,
9694 NULL,
9695 }, {
9696 NULL,
9697 gen_bfcvtn_ah_hs,
9698 NULL,
9699 }
9700 };
TRANS_FEAT(BFCVTN_v,aa64_bf16,do_2misc_narrow_vector,a,f_vector_bfcvtn[s->fpcr_ah])9701 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
9702 f_vector_bfcvtn[s->fpcr_ah])
9703
9704 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
9705 {
9706 static NeonGenWidenFn * const widenfns[3] = {
9707 gen_helper_neon_widen_u8,
9708 gen_helper_neon_widen_u16,
9709 tcg_gen_extu_i32_i64,
9710 };
9711 NeonGenWidenFn *widenfn;
9712 TCGv_i64 tcg_res[2];
9713 TCGv_i32 tcg_op;
9714 int part, pass;
9715
9716 if (a->esz == MO_64) {
9717 return false;
9718 }
9719 if (!fp_access_check(s)) {
9720 return true;
9721 }
9722
9723 tcg_op = tcg_temp_new_i32();
9724 widenfn = widenfns[a->esz];
9725 part = a->q ? 2 : 0;
9726
9727 for (pass = 0; pass < 2; pass++) {
9728 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
9729 tcg_res[pass] = tcg_temp_new_i64();
9730 widenfn(tcg_res[pass], tcg_op);
9731 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
9732 }
9733
9734 for (pass = 0; pass < 2; pass++) {
9735 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9736 }
9737 return true;
9738 }
9739
do_fabs_fneg_v(DisasContext * s,arg_qrr_e * a,GVecGen2Fn * fn)9740 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9741 {
9742 int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9743
9744 if (check <= 0) {
9745 return check == 0;
9746 }
9747
9748 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9749 return true;
9750 }
9751
TRANS(FABS_v,do_fabs_fneg_v,a,gen_gvec_fabs)9752 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
9753 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
9754
9755 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
9756 const FPScalar1 *f, int rmode)
9757 {
9758 TCGv_i32 tcg_rmode = NULL;
9759 TCGv_ptr fpst;
9760 int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9761
9762 if (check <= 0) {
9763 return check == 0;
9764 }
9765
9766 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9767 if (rmode >= 0) {
9768 tcg_rmode = gen_set_rmode(rmode, fpst);
9769 }
9770
9771 if (a->esz == MO_64) {
9772 TCGv_i64 t64 = tcg_temp_new_i64();
9773
9774 for (int pass = 0; pass < 2; ++pass) {
9775 read_vec_element(s, t64, a->rn, pass, MO_64);
9776 f->gen_d(t64, t64, fpst);
9777 write_vec_element(s, t64, a->rd, pass, MO_64);
9778 }
9779 } else {
9780 TCGv_i32 t32 = tcg_temp_new_i32();
9781 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
9782 = (a->esz == MO_16 ? f->gen_h : f->gen_s);
9783
9784 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
9785 read_vec_element_i32(s, t32, a->rn, pass, a->esz);
9786 gen(t32, t32, fpst);
9787 write_vec_element_i32(s, t32, a->rd, pass, a->esz);
9788 }
9789 }
9790 clear_vec_high(s, a->q, a->rd);
9791
9792 if (rmode >= 0) {
9793 gen_restore_rmode(tcg_rmode, fpst);
9794 }
9795 return true;
9796 }
9797
9798 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
9799
9800 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9801 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
9802 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
9803 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
9804 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9805 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
9806 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
9807
9808 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
9809 &f_scalar_frint32, FPROUNDING_ZERO)
9810 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
9811 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
9812 &f_scalar_frint64, FPROUNDING_ZERO)
9813 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
9814
do_gvec_op2_fpst_with_fpsttype(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3],ARMFPStatusFlavour fpsttype)9815 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
9816 bool is_q, int rd, int rn, int data,
9817 gen_helper_gvec_2_ptr * const fns[3],
9818 ARMFPStatusFlavour fpsttype)
9819 {
9820 int check = fp_access_check_vector_hsd(s, is_q, esz);
9821 TCGv_ptr fpst;
9822
9823 if (check <= 0) {
9824 return check == 0;
9825 }
9826
9827 fpst = fpstatus_ptr(fpsttype);
9828 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9829 vec_full_reg_offset(s, rn), fpst,
9830 is_q ? 16 : 8, vec_full_reg_size(s),
9831 data, fns[esz - 1]);
9832 return true;
9833 }
9834
do_gvec_op2_fpst(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3])9835 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
9836 int rd, int rn, int data,
9837 gen_helper_gvec_2_ptr * const fns[3])
9838 {
9839 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
9840 esz == MO_16 ? FPST_A64_F16 :
9841 FPST_A64);
9842 }
9843
do_gvec_op2_ah_fpst(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3])9844 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
9845 int rd, int rn, int data,
9846 gen_helper_gvec_2_ptr * const fns[3])
9847 {
9848 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
9849 fns, select_ah_fpst(s, esz));
9850 }
9851
9852 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
9853 gen_helper_gvec_vcvt_sh,
9854 gen_helper_gvec_vcvt_sf,
9855 gen_helper_gvec_vcvt_sd,
9856 };
9857 TRANS(SCVTF_vi, do_gvec_op2_fpst,
9858 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
9859 TRANS(SCVTF_vf, do_gvec_op2_fpst,
9860 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
9861
9862 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
9863 gen_helper_gvec_vcvt_uh,
9864 gen_helper_gvec_vcvt_uf,
9865 gen_helper_gvec_vcvt_ud,
9866 };
9867 TRANS(UCVTF_vi, do_gvec_op2_fpst,
9868 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
9869 TRANS(UCVTF_vf, do_gvec_op2_fpst,
9870 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
9871
9872 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
9873 gen_helper_gvec_vcvt_rz_hs,
9874 gen_helper_gvec_vcvt_rz_fs,
9875 gen_helper_gvec_vcvt_rz_ds,
9876 };
9877 TRANS(FCVTZS_vf, do_gvec_op2_fpst,
9878 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
9879
9880 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
9881 gen_helper_gvec_vcvt_rz_hu,
9882 gen_helper_gvec_vcvt_rz_fu,
9883 gen_helper_gvec_vcvt_rz_du,
9884 };
9885 TRANS(FCVTZU_vf, do_gvec_op2_fpst,
9886 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
9887
9888 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
9889 gen_helper_gvec_vcvt_rm_sh,
9890 gen_helper_gvec_vcvt_rm_ss,
9891 gen_helper_gvec_vcvt_rm_sd,
9892 };
9893
9894 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
9895 gen_helper_gvec_vcvt_rm_uh,
9896 gen_helper_gvec_vcvt_rm_us,
9897 gen_helper_gvec_vcvt_rm_ud,
9898 };
9899
9900 TRANS(FCVTNS_vi, do_gvec_op2_fpst,
9901 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
9902 TRANS(FCVTNU_vi, do_gvec_op2_fpst,
9903 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
9904 TRANS(FCVTPS_vi, do_gvec_op2_fpst,
9905 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
9906 TRANS(FCVTPU_vi, do_gvec_op2_fpst,
9907 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
9908 TRANS(FCVTMS_vi, do_gvec_op2_fpst,
9909 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
9910 TRANS(FCVTMU_vi, do_gvec_op2_fpst,
9911 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
9912 TRANS(FCVTZS_vi, do_gvec_op2_fpst,
9913 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
9914 TRANS(FCVTZU_vi, do_gvec_op2_fpst,
9915 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
9916 TRANS(FCVTAS_vi, do_gvec_op2_fpst,
9917 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
9918 TRANS(FCVTAU_vi, do_gvec_op2_fpst,
9919 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
9920
9921 static gen_helper_gvec_2_ptr * const f_fceq0[] = {
9922 gen_helper_gvec_fceq0_h,
9923 gen_helper_gvec_fceq0_s,
9924 gen_helper_gvec_fceq0_d,
9925 };
9926 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
9927
9928 static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
9929 gen_helper_gvec_fcgt0_h,
9930 gen_helper_gvec_fcgt0_s,
9931 gen_helper_gvec_fcgt0_d,
9932 };
9933 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
9934
9935 static gen_helper_gvec_2_ptr * const f_fcge0[] = {
9936 gen_helper_gvec_fcge0_h,
9937 gen_helper_gvec_fcge0_s,
9938 gen_helper_gvec_fcge0_d,
9939 };
9940 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
9941
9942 static gen_helper_gvec_2_ptr * const f_fclt0[] = {
9943 gen_helper_gvec_fclt0_h,
9944 gen_helper_gvec_fclt0_s,
9945 gen_helper_gvec_fclt0_d,
9946 };
9947 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
9948
9949 static gen_helper_gvec_2_ptr * const f_fcle0[] = {
9950 gen_helper_gvec_fcle0_h,
9951 gen_helper_gvec_fcle0_s,
9952 gen_helper_gvec_fcle0_d,
9953 };
9954 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
9955
9956 static gen_helper_gvec_2_ptr * const f_frecpe[] = {
9957 gen_helper_gvec_frecpe_h,
9958 gen_helper_gvec_frecpe_s,
9959 gen_helper_gvec_frecpe_d,
9960 };
9961 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
9962 gen_helper_gvec_frecpe_h,
9963 gen_helper_gvec_frecpe_rpres_s,
9964 gen_helper_gvec_frecpe_d,
9965 };
9966 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9967 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
9968
9969 static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
9970 gen_helper_gvec_frsqrte_h,
9971 gen_helper_gvec_frsqrte_s,
9972 gen_helper_gvec_frsqrte_d,
9973 };
9974 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
9975 gen_helper_gvec_frsqrte_h,
9976 gen_helper_gvec_frsqrte_rpres_s,
9977 gen_helper_gvec_frsqrte_d,
9978 };
9979 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9980 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
9981
trans_FCVTL_v(DisasContext * s,arg_qrr_e * a)9982 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
9983 {
9984 /* Handle 2-reg-misc ops which are widening (so each size element
9985 * in the source becomes a 2*size element in the destination.
9986 * The only instruction like this is FCVTL.
9987 */
9988 int pass;
9989 TCGv_ptr fpst;
9990
9991 if (!fp_access_check(s)) {
9992 return true;
9993 }
9994
9995 if (a->esz == MO_64) {
9996 /* 32 -> 64 bit fp conversion */
9997 TCGv_i64 tcg_res[2];
9998 TCGv_i32 tcg_op = tcg_temp_new_i32();
9999 int srcelt = a->q ? 2 : 0;
10000
10001 fpst = fpstatus_ptr(FPST_A64);
10002
10003 for (pass = 0; pass < 2; pass++) {
10004 tcg_res[pass] = tcg_temp_new_i64();
10005 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
10006 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
10007 }
10008 for (pass = 0; pass < 2; pass++) {
10009 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
10010 }
10011 } else {
10012 /* 16 -> 32 bit fp conversion */
10013 int srcelt = a->q ? 4 : 0;
10014 TCGv_i32 tcg_res[4];
10015 TCGv_i32 ahp = get_ahp_flag();
10016
10017 fpst = fpstatus_ptr(FPST_A64_F16);
10018
10019 for (pass = 0; pass < 4; pass++) {
10020 tcg_res[pass] = tcg_temp_new_i32();
10021 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
10022 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10023 fpst, ahp);
10024 }
10025 for (pass = 0; pass < 4; pass++) {
10026 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
10027 }
10028 }
10029 clear_vec_high(s, true, a->rd);
10030 return true;
10031 }
10032
trans_OK(DisasContext * s,arg_OK * a)10033 static bool trans_OK(DisasContext *s, arg_OK *a)
10034 {
10035 return true;
10036 }
10037
trans_FAIL(DisasContext * s,arg_OK * a)10038 static bool trans_FAIL(DisasContext *s, arg_OK *a)
10039 {
10040 s->is_nonstreaming = true;
10041 return true;
10042 }
10043
10044 /**
10045 * btype_destination_ok:
10046 * @insn: The instruction at the branch destination
10047 * @bt: SCTLR_ELx.BT
10048 * @btype: PSTATE.BTYPE, and is non-zero
10049 *
10050 * On a guarded page, there are a limited number of insns
10051 * that may be present at the branch target:
10052 * - branch target identifiers,
10053 * - paciasp, pacibsp,
10054 * - BRK insn
10055 * - HLT insn
10056 * Anything else causes a Branch Target Exception.
10057 *
10058 * Return true if the branch is compatible, false to raise BTITRAP.
10059 */
btype_destination_ok(uint32_t insn,bool bt,int btype)10060 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
10061 {
10062 if ((insn & 0xfffff01fu) == 0xd503201fu) {
10063 /* HINT space */
10064 switch (extract32(insn, 5, 7)) {
10065 case 0b011001: /* PACIASP */
10066 case 0b011011: /* PACIBSP */
10067 /*
10068 * If SCTLR_ELx.BT, then PACI*SP are not compatible
10069 * with btype == 3. Otherwise all btype are ok.
10070 */
10071 return !bt || btype != 3;
10072 case 0b100000: /* BTI */
10073 /* Not compatible with any btype. */
10074 return false;
10075 case 0b100010: /* BTI c */
10076 /* Not compatible with btype == 3 */
10077 return btype != 3;
10078 case 0b100100: /* BTI j */
10079 /* Not compatible with btype == 2 */
10080 return btype != 2;
10081 case 0b100110: /* BTI jc */
10082 /* Compatible with any btype. */
10083 return true;
10084 }
10085 } else {
10086 switch (insn & 0xffe0001fu) {
10087 case 0xd4200000u: /* BRK */
10088 case 0xd4400000u: /* HLT */
10089 /* Give priority to the breakpoint exception. */
10090 return true;
10091 }
10092 }
10093 return false;
10094 }
10095
aarch64_tr_init_disas_context(DisasContextBase * dcbase,CPUState * cpu)10096 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
10097 CPUState *cpu)
10098 {
10099 DisasContext *dc = container_of(dcbase, DisasContext, base);
10100 CPUARMState *env = cpu_env(cpu);
10101 ARMCPU *arm_cpu = env_archcpu(env);
10102 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
10103 int bound, core_mmu_idx;
10104
10105 dc->isar = &arm_cpu->isar;
10106 dc->condjmp = 0;
10107 dc->pc_save = dc->base.pc_first;
10108 dc->aarch64 = true;
10109 dc->thumb = false;
10110 dc->sctlr_b = 0;
10111 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
10112 dc->condexec_mask = 0;
10113 dc->condexec_cond = 0;
10114 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
10115 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
10116 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
10117 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
10118 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
10119 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10120 #if !defined(CONFIG_USER_ONLY)
10121 dc->user = (dc->current_el == 0);
10122 #endif
10123 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
10124 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
10125 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
10126 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
10127 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
10128 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
10129 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
10130 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
10131 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
10132 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
10133 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
10134 dc->bt = EX_TBFLAG_A64(tb_flags, BT);
10135 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
10136 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
10137 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
10138 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
10139 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
10140 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
10141 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
10142 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
10143 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
10144 dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
10145 dc->nv = EX_TBFLAG_A64(tb_flags, NV);
10146 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
10147 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
10148 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
10149 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
10150 dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
10151 dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
10152 dc->vec_len = 0;
10153 dc->vec_stride = 0;
10154 dc->cp_regs = arm_cpu->cp_regs;
10155 dc->features = env->features;
10156 dc->dcz_blocksize = arm_cpu->dcz_blocksize;
10157 dc->gm_blocksize = arm_cpu->gm_blocksize;
10158
10159 #ifdef CONFIG_USER_ONLY
10160 /* In sve_probe_page, we assume TBI is enabled. */
10161 tcg_debug_assert(dc->tbid & 1);
10162 #endif
10163
10164 dc->lse2 = dc_isar_feature(aa64_lse2, dc);
10165
10166 /* Single step state. The code-generation logic here is:
10167 * SS_ACTIVE == 0:
10168 * generate code with no special handling for single-stepping (except
10169 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10170 * this happens anyway because those changes are all system register or
10171 * PSTATE writes).
10172 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10173 * emit code for one insn
10174 * emit code to clear PSTATE.SS
10175 * emit code to generate software step exception for completed step
10176 * end TB (as usual for having generated an exception)
10177 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10178 * emit code to generate a software step exception
10179 * end the TB
10180 */
10181 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
10182 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
10183 dc->is_ldex = false;
10184
10185 /* Bound the number of insns to execute to those left on the page. */
10186 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10187
10188 /* If architectural single step active, limit to 1. */
10189 if (dc->ss_active) {
10190 bound = 1;
10191 }
10192 dc->base.max_insns = MIN(dc->base.max_insns, bound);
10193 }
10194
aarch64_tr_tb_start(DisasContextBase * db,CPUState * cpu)10195 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
10196 {
10197 }
10198
aarch64_tr_insn_start(DisasContextBase * dcbase,CPUState * cpu)10199 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10200 {
10201 DisasContext *dc = container_of(dcbase, DisasContext, base);
10202 target_ulong pc_arg = dc->base.pc_next;
10203
10204 if (tb_cflags(dcbase->tb) & CF_PCREL) {
10205 pc_arg &= ~TARGET_PAGE_MASK;
10206 }
10207 tcg_gen_insn_start(pc_arg, 0, 0);
10208 dc->insn_start_updated = false;
10209 }
10210
aarch64_tr_translate_insn(DisasContextBase * dcbase,CPUState * cpu)10211 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10212 {
10213 DisasContext *s = container_of(dcbase, DisasContext, base);
10214 CPUARMState *env = cpu_env(cpu);
10215 uint64_t pc = s->base.pc_next;
10216 uint32_t insn;
10217
10218 /* Singlestep exceptions have the highest priority. */
10219 if (s->ss_active && !s->pstate_ss) {
10220 /* Singlestep state is Active-pending.
10221 * If we're in this state at the start of a TB then either
10222 * a) we just took an exception to an EL which is being debugged
10223 * and this is the first insn in the exception handler
10224 * b) debug exceptions were masked and we just unmasked them
10225 * without changing EL (eg by clearing PSTATE.D)
10226 * In either case we're going to take a swstep exception in the
10227 * "did not step an insn" case, and so the syndrome ISV and EX
10228 * bits should be zero.
10229 */
10230 assert(s->base.num_insns == 1);
10231 gen_swstep_exception(s, 0, 0);
10232 s->base.is_jmp = DISAS_NORETURN;
10233 s->base.pc_next = pc + 4;
10234 return;
10235 }
10236
10237 if (pc & 3) {
10238 /*
10239 * PC alignment fault. This has priority over the instruction abort
10240 * that we would receive from a translation fault via arm_ldl_code.
10241 * This should only be possible after an indirect branch, at the
10242 * start of the TB.
10243 */
10244 assert(s->base.num_insns == 1);
10245 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
10246 s->base.is_jmp = DISAS_NORETURN;
10247 s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
10248 return;
10249 }
10250
10251 s->pc_curr = pc;
10252 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
10253 s->insn = insn;
10254 s->base.pc_next = pc + 4;
10255
10256 s->fp_access_checked = 0;
10257 s->sve_access_checked = 0;
10258
10259 if (s->pstate_il) {
10260 /*
10261 * Illegal execution state. This has priority over BTI
10262 * exceptions, but comes after instruction abort exceptions.
10263 */
10264 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
10265 return;
10266 }
10267
10268 if (dc_isar_feature(aa64_bti, s)) {
10269 if (s->base.num_insns == 1) {
10270 /* First insn can have btype set to non-zero. */
10271 tcg_debug_assert(s->btype >= 0);
10272
10273 /*
10274 * Note that the Branch Target Exception has fairly high
10275 * priority -- below debugging exceptions but above most
10276 * everything else. This allows us to handle this now
10277 * instead of waiting until the insn is otherwise decoded.
10278 *
10279 * We can check all but the guarded page check here;
10280 * defer the latter to a helper.
10281 */
10282 if (s->btype != 0
10283 && !btype_destination_ok(insn, s->bt, s->btype)) {
10284 gen_helper_guarded_page_check(tcg_env);
10285 }
10286 } else {
10287 /* Not the first insn: btype must be 0. */
10288 tcg_debug_assert(s->btype == 0);
10289 }
10290 }
10291
10292 s->is_nonstreaming = false;
10293 if (s->sme_trap_nonstreaming) {
10294 disas_sme_fa64(s, insn);
10295 }
10296
10297 if (!disas_a64(s, insn) &&
10298 !disas_sme(s, insn) &&
10299 !disas_sve(s, insn)) {
10300 unallocated_encoding(s);
10301 }
10302
10303 /*
10304 * After execution of most insns, btype is reset to 0.
10305 * Note that we set btype == -1 when the insn sets btype.
10306 */
10307 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
10308 reset_btype(s);
10309 }
10310 }
10311
aarch64_tr_tb_stop(DisasContextBase * dcbase,CPUState * cpu)10312 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10313 {
10314 DisasContext *dc = container_of(dcbase, DisasContext, base);
10315
10316 if (unlikely(dc->ss_active)) {
10317 /* Note that this means single stepping WFI doesn't halt the CPU.
10318 * For conditional branch insns this is harmless unreachable code as
10319 * gen_goto_tb() has already handled emitting the debug exception
10320 * (and thus a tb-jump is not possible when singlestepping).
10321 */
10322 switch (dc->base.is_jmp) {
10323 default:
10324 gen_a64_update_pc(dc, 4);
10325 /* fall through */
10326 case DISAS_EXIT:
10327 case DISAS_JUMP:
10328 gen_step_complete_exception(dc);
10329 break;
10330 case DISAS_NORETURN:
10331 break;
10332 }
10333 } else {
10334 switch (dc->base.is_jmp) {
10335 case DISAS_NEXT:
10336 case DISAS_TOO_MANY:
10337 gen_goto_tb(dc, 1, 4);
10338 break;
10339 default:
10340 case DISAS_UPDATE_EXIT:
10341 gen_a64_update_pc(dc, 4);
10342 /* fall through */
10343 case DISAS_EXIT:
10344 tcg_gen_exit_tb(NULL, 0);
10345 break;
10346 case DISAS_UPDATE_NOCHAIN:
10347 gen_a64_update_pc(dc, 4);
10348 /* fall through */
10349 case DISAS_JUMP:
10350 tcg_gen_lookup_and_goto_ptr();
10351 break;
10352 case DISAS_NORETURN:
10353 case DISAS_SWI:
10354 break;
10355 case DISAS_WFE:
10356 gen_a64_update_pc(dc, 4);
10357 gen_helper_wfe(tcg_env);
10358 break;
10359 case DISAS_YIELD:
10360 gen_a64_update_pc(dc, 4);
10361 gen_helper_yield(tcg_env);
10362 break;
10363 case DISAS_WFI:
10364 /*
10365 * This is a special case because we don't want to just halt
10366 * the CPU if trying to debug across a WFI.
10367 */
10368 gen_a64_update_pc(dc, 4);
10369 gen_helper_wfi(tcg_env, tcg_constant_i32(4));
10370 /*
10371 * The helper doesn't necessarily throw an exception, but we
10372 * must go back to the main loop to check for interrupts anyway.
10373 */
10374 tcg_gen_exit_tb(NULL, 0);
10375 break;
10376 }
10377 }
10378 }
10379
10380 const TranslatorOps aarch64_translator_ops = {
10381 .init_disas_context = aarch64_tr_init_disas_context,
10382 .tb_start = aarch64_tr_tb_start,
10383 .insn_start = aarch64_tr_insn_start,
10384 .translate_insn = aarch64_tr_translate_insn,
10385 .tb_stop = aarch64_tr_tb_stop,
10386 };
10387