xref: /qemu/target/arm/tcg/translate-mve.c (revision f0ffff5163cb503de236fc766121601592f08744)
1 /*
2  *  ARM translation: M-profile MVE instructions
3  *
4  *  Copyright (c) 2021 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
27 
28 static inline int vidup_imm(DisasContext *s, int x)
29 {
30     return 1 << x;
31 }
32 
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
35 
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
49 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
50 
51 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
52 static inline long mve_qreg_offset(unsigned reg)
53 {
54     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
55 }
56 
57 static TCGv_ptr mve_qreg_ptr(unsigned reg)
58 {
59     TCGv_ptr ret = tcg_temp_new_ptr();
60     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
61     return ret;
62 }
63 
64 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
65 {
66     /*
67      * Check whether Qregs are in range. For v8.1M only Q0..Q7
68      * are supported, see VFPSmallRegisterBank().
69      */
70     return qmask < 8;
71 }
72 
73 bool mve_eci_check(DisasContext *s)
74 {
75     /*
76      * This is a beatwise insn: check that ECI is valid (not a
77      * reserved value) and note that we are handling it.
78      * Return true if OK, false if we generated an exception.
79      */
80     s->eci_handled = true;
81     switch (s->eci) {
82     case ECI_NONE:
83     case ECI_A0:
84     case ECI_A0A1:
85     case ECI_A0A1A2:
86     case ECI_A0A1A2B0:
87         return true;
88     default:
89         /* Reserved value: INVSTATE UsageFault */
90         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
91                            default_exception_el(s));
92         return false;
93     }
94 }
95 
96 static void mve_update_eci(DisasContext *s)
97 {
98     /*
99      * The helper function will always update the CPUState field,
100      * so we only need to update the DisasContext field.
101      */
102     if (s->eci) {
103         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
104     }
105 }
106 
107 void mve_update_and_store_eci(DisasContext *s)
108 {
109     /*
110      * For insns which don't call a helper function that will call
111      * mve_advance_vpt(), this version updates s->eci and also stores
112      * it out to the CPUState field.
113      */
114     if (s->eci) {
115         mve_update_eci(s);
116         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
117     }
118 }
119 
120 static bool mve_skip_first_beat(DisasContext *s)
121 {
122     /* Return true if PSR.ECI says we must skip the first beat of this insn */
123     switch (s->eci) {
124     case ECI_NONE:
125         return false;
126     case ECI_A0:
127     case ECI_A0A1:
128     case ECI_A0A1A2:
129     case ECI_A0A1A2B0:
130         return true;
131     default:
132         g_assert_not_reached();
133     }
134 }
135 
136 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
137                     unsigned msize)
138 {
139     TCGv_i32 addr;
140     uint32_t offset;
141     TCGv_ptr qreg;
142 
143     if (!dc_isar_feature(aa32_mve, s) ||
144         !mve_check_qreg_bank(s, a->qd) ||
145         !fn) {
146         return false;
147     }
148 
149     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
150     if (a->rn == 15 || (a->rn == 13 && a->w)) {
151         return false;
152     }
153 
154     if (!mve_eci_check(s) || !vfp_access_check(s)) {
155         return true;
156     }
157 
158     offset = a->imm << msize;
159     if (!a->a) {
160         offset = -offset;
161     }
162     addr = load_reg(s, a->rn);
163     if (a->p) {
164         tcg_gen_addi_i32(addr, addr, offset);
165     }
166 
167     qreg = mve_qreg_ptr(a->qd);
168     fn(cpu_env, qreg, addr);
169     tcg_temp_free_ptr(qreg);
170 
171     /*
172      * Writeback always happens after the last beat of the insn,
173      * regardless of predication
174      */
175     if (a->w) {
176         if (!a->p) {
177             tcg_gen_addi_i32(addr, addr, offset);
178         }
179         store_reg(s, a->rn, addr);
180     } else {
181         tcg_temp_free_i32(addr);
182     }
183     mve_update_eci(s);
184     return true;
185 }
186 
187 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
188 {
189     static MVEGenLdStFn * const ldstfns[4][2] = {
190         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
191         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
192         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
193         { NULL, NULL }
194     };
195     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
196 }
197 
198 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
199     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
200     {                                                           \
201         static MVEGenLdStFn * const ldstfns[2][2] = {           \
202             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
203             { NULL, gen_helper_mve_##ULD },                     \
204         };                                                      \
205         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
206     }
207 
208 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
209 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
210 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
211 
212 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
213 {
214     TCGv_ptr qd;
215     TCGv_i32 rt;
216 
217     if (!dc_isar_feature(aa32_mve, s) ||
218         !mve_check_qreg_bank(s, a->qd)) {
219         return false;
220     }
221     if (a->rt == 13 || a->rt == 15) {
222         /* UNPREDICTABLE; we choose to UNDEF */
223         return false;
224     }
225     if (!mve_eci_check(s) || !vfp_access_check(s)) {
226         return true;
227     }
228 
229     qd = mve_qreg_ptr(a->qd);
230     rt = load_reg(s, a->rt);
231     tcg_gen_dup_i32(a->size, rt, rt);
232     gen_helper_mve_vdup(cpu_env, qd, rt);
233     tcg_temp_free_ptr(qd);
234     tcg_temp_free_i32(rt);
235     mve_update_eci(s);
236     return true;
237 }
238 
239 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
240 {
241     TCGv_ptr qd, qm;
242 
243     if (!dc_isar_feature(aa32_mve, s) ||
244         !mve_check_qreg_bank(s, a->qd | a->qm) ||
245         !fn) {
246         return false;
247     }
248 
249     if (!mve_eci_check(s) || !vfp_access_check(s)) {
250         return true;
251     }
252 
253     qd = mve_qreg_ptr(a->qd);
254     qm = mve_qreg_ptr(a->qm);
255     fn(cpu_env, qd, qm);
256     tcg_temp_free_ptr(qd);
257     tcg_temp_free_ptr(qm);
258     mve_update_eci(s);
259     return true;
260 }
261 
262 #define DO_1OP(INSN, FN)                                        \
263     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
264     {                                                           \
265         static MVEGenOneOpFn * const fns[] = {                  \
266             gen_helper_mve_##FN##b,                             \
267             gen_helper_mve_##FN##h,                             \
268             gen_helper_mve_##FN##w,                             \
269             NULL,                                               \
270         };                                                      \
271         return do_1op(s, a, fns[a->size]);                      \
272     }
273 
274 DO_1OP(VCLZ, vclz)
275 DO_1OP(VCLS, vcls)
276 DO_1OP(VABS, vabs)
277 DO_1OP(VNEG, vneg)
278 
279 /* Narrowing moves: only size 0 and 1 are valid */
280 #define DO_VMOVN(INSN, FN) \
281     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
282     {                                                           \
283         static MVEGenOneOpFn * const fns[] = {                  \
284             gen_helper_mve_##FN##b,                             \
285             gen_helper_mve_##FN##h,                             \
286             NULL,                                               \
287             NULL,                                               \
288         };                                                      \
289         return do_1op(s, a, fns[a->size]);                      \
290     }
291 
292 DO_VMOVN(VMOVNB, vmovnb)
293 DO_VMOVN(VMOVNT, vmovnt)
294 DO_VMOVN(VQMOVUNB, vqmovunb)
295 DO_VMOVN(VQMOVUNT, vqmovunt)
296 DO_VMOVN(VQMOVN_BS, vqmovnbs)
297 DO_VMOVN(VQMOVN_TS, vqmovnts)
298 DO_VMOVN(VQMOVN_BU, vqmovnbu)
299 DO_VMOVN(VQMOVN_TU, vqmovntu)
300 
301 static bool trans_VREV16(DisasContext *s, arg_1op *a)
302 {
303     static MVEGenOneOpFn * const fns[] = {
304         gen_helper_mve_vrev16b,
305         NULL,
306         NULL,
307         NULL,
308     };
309     return do_1op(s, a, fns[a->size]);
310 }
311 
312 static bool trans_VREV32(DisasContext *s, arg_1op *a)
313 {
314     static MVEGenOneOpFn * const fns[] = {
315         gen_helper_mve_vrev32b,
316         gen_helper_mve_vrev32h,
317         NULL,
318         NULL,
319     };
320     return do_1op(s, a, fns[a->size]);
321 }
322 
323 static bool trans_VREV64(DisasContext *s, arg_1op *a)
324 {
325     static MVEGenOneOpFn * const fns[] = {
326         gen_helper_mve_vrev64b,
327         gen_helper_mve_vrev64h,
328         gen_helper_mve_vrev64w,
329         NULL,
330     };
331     return do_1op(s, a, fns[a->size]);
332 }
333 
334 static bool trans_VMVN(DisasContext *s, arg_1op *a)
335 {
336     return do_1op(s, a, gen_helper_mve_vmvn);
337 }
338 
339 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
340 {
341     static MVEGenOneOpFn * const fns[] = {
342         NULL,
343         gen_helper_mve_vfabsh,
344         gen_helper_mve_vfabss,
345         NULL,
346     };
347     if (!dc_isar_feature(aa32_mve_fp, s)) {
348         return false;
349     }
350     return do_1op(s, a, fns[a->size]);
351 }
352 
353 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
354 {
355     static MVEGenOneOpFn * const fns[] = {
356         NULL,
357         gen_helper_mve_vfnegh,
358         gen_helper_mve_vfnegs,
359         NULL,
360     };
361     if (!dc_isar_feature(aa32_mve_fp, s)) {
362         return false;
363     }
364     return do_1op(s, a, fns[a->size]);
365 }
366 
367 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
368 {
369     TCGv_ptr qd, qn, qm;
370 
371     if (!dc_isar_feature(aa32_mve, s) ||
372         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
373         !fn) {
374         return false;
375     }
376     if (!mve_eci_check(s) || !vfp_access_check(s)) {
377         return true;
378     }
379 
380     qd = mve_qreg_ptr(a->qd);
381     qn = mve_qreg_ptr(a->qn);
382     qm = mve_qreg_ptr(a->qm);
383     fn(cpu_env, qd, qn, qm);
384     tcg_temp_free_ptr(qd);
385     tcg_temp_free_ptr(qn);
386     tcg_temp_free_ptr(qm);
387     mve_update_eci(s);
388     return true;
389 }
390 
391 #define DO_LOGIC(INSN, HELPER)                                  \
392     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
393     {                                                           \
394         return do_2op(s, a, HELPER);                            \
395     }
396 
397 DO_LOGIC(VAND, gen_helper_mve_vand)
398 DO_LOGIC(VBIC, gen_helper_mve_vbic)
399 DO_LOGIC(VORR, gen_helper_mve_vorr)
400 DO_LOGIC(VORN, gen_helper_mve_vorn)
401 DO_LOGIC(VEOR, gen_helper_mve_veor)
402 
403 DO_LOGIC(VPSEL, gen_helper_mve_vpsel)
404 
405 #define DO_2OP(INSN, FN) \
406     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
407     {                                                           \
408         static MVEGenTwoOpFn * const fns[] = {                  \
409             gen_helper_mve_##FN##b,                             \
410             gen_helper_mve_##FN##h,                             \
411             gen_helper_mve_##FN##w,                             \
412             NULL,                                               \
413         };                                                      \
414         return do_2op(s, a, fns[a->size]);                      \
415     }
416 
417 DO_2OP(VADD, vadd)
418 DO_2OP(VSUB, vsub)
419 DO_2OP(VMUL, vmul)
420 DO_2OP(VMULH_S, vmulhs)
421 DO_2OP(VMULH_U, vmulhu)
422 DO_2OP(VRMULH_S, vrmulhs)
423 DO_2OP(VRMULH_U, vrmulhu)
424 DO_2OP(VMAX_S, vmaxs)
425 DO_2OP(VMAX_U, vmaxu)
426 DO_2OP(VMIN_S, vmins)
427 DO_2OP(VMIN_U, vminu)
428 DO_2OP(VABD_S, vabds)
429 DO_2OP(VABD_U, vabdu)
430 DO_2OP(VHADD_S, vhadds)
431 DO_2OP(VHADD_U, vhaddu)
432 DO_2OP(VHSUB_S, vhsubs)
433 DO_2OP(VHSUB_U, vhsubu)
434 DO_2OP(VMULL_BS, vmullbs)
435 DO_2OP(VMULL_BU, vmullbu)
436 DO_2OP(VMULL_TS, vmullts)
437 DO_2OP(VMULL_TU, vmulltu)
438 DO_2OP(VQDMULH, vqdmulh)
439 DO_2OP(VQRDMULH, vqrdmulh)
440 DO_2OP(VQADD_S, vqadds)
441 DO_2OP(VQADD_U, vqaddu)
442 DO_2OP(VQSUB_S, vqsubs)
443 DO_2OP(VQSUB_U, vqsubu)
444 DO_2OP(VSHL_S, vshls)
445 DO_2OP(VSHL_U, vshlu)
446 DO_2OP(VRSHL_S, vrshls)
447 DO_2OP(VRSHL_U, vrshlu)
448 DO_2OP(VQSHL_S, vqshls)
449 DO_2OP(VQSHL_U, vqshlu)
450 DO_2OP(VQRSHL_S, vqrshls)
451 DO_2OP(VQRSHL_U, vqrshlu)
452 DO_2OP(VQDMLADH, vqdmladh)
453 DO_2OP(VQDMLADHX, vqdmladhx)
454 DO_2OP(VQRDMLADH, vqrdmladh)
455 DO_2OP(VQRDMLADHX, vqrdmladhx)
456 DO_2OP(VQDMLSDH, vqdmlsdh)
457 DO_2OP(VQDMLSDHX, vqdmlsdhx)
458 DO_2OP(VQRDMLSDH, vqrdmlsdh)
459 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
460 DO_2OP(VRHADD_S, vrhadds)
461 DO_2OP(VRHADD_U, vrhaddu)
462 /*
463  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
464  * so we can reuse the DO_2OP macro. (Our implementation calculates the
465  * "expected" results in this case.) Similarly for VHCADD.
466  */
467 DO_2OP(VCADD90, vcadd90)
468 DO_2OP(VCADD270, vcadd270)
469 DO_2OP(VHCADD90, vhcadd90)
470 DO_2OP(VHCADD270, vhcadd270)
471 
472 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
473 {
474     static MVEGenTwoOpFn * const fns[] = {
475         NULL,
476         gen_helper_mve_vqdmullbh,
477         gen_helper_mve_vqdmullbw,
478         NULL,
479     };
480     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
481         /* UNPREDICTABLE; we choose to undef */
482         return false;
483     }
484     return do_2op(s, a, fns[a->size]);
485 }
486 
487 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
488 {
489     static MVEGenTwoOpFn * const fns[] = {
490         NULL,
491         gen_helper_mve_vqdmullth,
492         gen_helper_mve_vqdmulltw,
493         NULL,
494     };
495     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
496         /* UNPREDICTABLE; we choose to undef */
497         return false;
498     }
499     return do_2op(s, a, fns[a->size]);
500 }
501 
502 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
503 {
504     /*
505      * Note that a->size indicates the output size, ie VMULL.P8
506      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
507      * is the 16x16->32 operation and a->size is MO_32.
508      */
509     static MVEGenTwoOpFn * const fns[] = {
510         NULL,
511         gen_helper_mve_vmullpbh,
512         gen_helper_mve_vmullpbw,
513         NULL,
514     };
515     return do_2op(s, a, fns[a->size]);
516 }
517 
518 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
519 {
520     /* a->size is as for trans_VMULLP_B */
521     static MVEGenTwoOpFn * const fns[] = {
522         NULL,
523         gen_helper_mve_vmullpth,
524         gen_helper_mve_vmullptw,
525         NULL,
526     };
527     return do_2op(s, a, fns[a->size]);
528 }
529 
530 /*
531  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
532  * of the 32-bit elements in each lane of the input vectors, where the
533  * carry-out of each add is the carry-in of the next.  The initial carry
534  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
535  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
536  * These insns are subject to beat-wise execution.  Partial execution
537  * of an I=1 (initial carry input fixed) insn which does not
538  * execute the first beat must start with the current FPSCR.NZCV
539  * value, not the fixed constant input.
540  */
541 static bool trans_VADC(DisasContext *s, arg_2op *a)
542 {
543     return do_2op(s, a, gen_helper_mve_vadc);
544 }
545 
546 static bool trans_VADCI(DisasContext *s, arg_2op *a)
547 {
548     if (mve_skip_first_beat(s)) {
549         return trans_VADC(s, a);
550     }
551     return do_2op(s, a, gen_helper_mve_vadci);
552 }
553 
554 static bool trans_VSBC(DisasContext *s, arg_2op *a)
555 {
556     return do_2op(s, a, gen_helper_mve_vsbc);
557 }
558 
559 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
560 {
561     if (mve_skip_first_beat(s)) {
562         return trans_VSBC(s, a);
563     }
564     return do_2op(s, a, gen_helper_mve_vsbci);
565 }
566 
567 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
568                           MVEGenTwoOpScalarFn fn)
569 {
570     TCGv_ptr qd, qn;
571     TCGv_i32 rm;
572 
573     if (!dc_isar_feature(aa32_mve, s) ||
574         !mve_check_qreg_bank(s, a->qd | a->qn) ||
575         !fn) {
576         return false;
577     }
578     if (a->rm == 13 || a->rm == 15) {
579         /* UNPREDICTABLE */
580         return false;
581     }
582     if (!mve_eci_check(s) || !vfp_access_check(s)) {
583         return true;
584     }
585 
586     qd = mve_qreg_ptr(a->qd);
587     qn = mve_qreg_ptr(a->qn);
588     rm = load_reg(s, a->rm);
589     fn(cpu_env, qd, qn, rm);
590     tcg_temp_free_i32(rm);
591     tcg_temp_free_ptr(qd);
592     tcg_temp_free_ptr(qn);
593     mve_update_eci(s);
594     return true;
595 }
596 
597 #define DO_2OP_SCALAR(INSN, FN) \
598     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
599     {                                                           \
600         static MVEGenTwoOpScalarFn * const fns[] = {            \
601             gen_helper_mve_##FN##b,                             \
602             gen_helper_mve_##FN##h,                             \
603             gen_helper_mve_##FN##w,                             \
604             NULL,                                               \
605         };                                                      \
606         return do_2op_scalar(s, a, fns[a->size]);               \
607     }
608 
609 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
610 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
611 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
612 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
613 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
614 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
615 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
616 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
617 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
618 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
619 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
620 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
621 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
622 DO_2OP_SCALAR(VBRSR, vbrsr)
623 DO_2OP_SCALAR(VMLAS, vmlas)
624 
625 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
626 {
627     static MVEGenTwoOpScalarFn * const fns[] = {
628         NULL,
629         gen_helper_mve_vqdmullb_scalarh,
630         gen_helper_mve_vqdmullb_scalarw,
631         NULL,
632     };
633     if (a->qd == a->qn && a->size == MO_32) {
634         /* UNPREDICTABLE; we choose to undef */
635         return false;
636     }
637     return do_2op_scalar(s, a, fns[a->size]);
638 }
639 
640 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
641 {
642     static MVEGenTwoOpScalarFn * const fns[] = {
643         NULL,
644         gen_helper_mve_vqdmullt_scalarh,
645         gen_helper_mve_vqdmullt_scalarw,
646         NULL,
647     };
648     if (a->qd == a->qn && a->size == MO_32) {
649         /* UNPREDICTABLE; we choose to undef */
650         return false;
651     }
652     return do_2op_scalar(s, a, fns[a->size]);
653 }
654 
655 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
656                              MVEGenLongDualAccOpFn *fn)
657 {
658     TCGv_ptr qn, qm;
659     TCGv_i64 rda;
660     TCGv_i32 rdalo, rdahi;
661 
662     if (!dc_isar_feature(aa32_mve, s) ||
663         !mve_check_qreg_bank(s, a->qn | a->qm) ||
664         !fn) {
665         return false;
666     }
667     /*
668      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
669      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
670      */
671     if (a->rdahi == 13 || a->rdahi == 15) {
672         return false;
673     }
674     if (!mve_eci_check(s) || !vfp_access_check(s)) {
675         return true;
676     }
677 
678     qn = mve_qreg_ptr(a->qn);
679     qm = mve_qreg_ptr(a->qm);
680 
681     /*
682      * This insn is subject to beat-wise execution. Partial execution
683      * of an A=0 (no-accumulate) insn which does not execute the first
684      * beat must start with the current rda value, not 0.
685      */
686     if (a->a || mve_skip_first_beat(s)) {
687         rda = tcg_temp_new_i64();
688         rdalo = load_reg(s, a->rdalo);
689         rdahi = load_reg(s, a->rdahi);
690         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
691         tcg_temp_free_i32(rdalo);
692         tcg_temp_free_i32(rdahi);
693     } else {
694         rda = tcg_const_i64(0);
695     }
696 
697     fn(rda, cpu_env, qn, qm, rda);
698     tcg_temp_free_ptr(qn);
699     tcg_temp_free_ptr(qm);
700 
701     rdalo = tcg_temp_new_i32();
702     rdahi = tcg_temp_new_i32();
703     tcg_gen_extrl_i64_i32(rdalo, rda);
704     tcg_gen_extrh_i64_i32(rdahi, rda);
705     store_reg(s, a->rdalo, rdalo);
706     store_reg(s, a->rdahi, rdahi);
707     tcg_temp_free_i64(rda);
708     mve_update_eci(s);
709     return true;
710 }
711 
712 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
713 {
714     static MVEGenLongDualAccOpFn * const fns[4][2] = {
715         { NULL, NULL },
716         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
717         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
718         { NULL, NULL },
719     };
720     return do_long_dual_acc(s, a, fns[a->size][a->x]);
721 }
722 
723 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
724 {
725     static MVEGenLongDualAccOpFn * const fns[4][2] = {
726         { NULL, NULL },
727         { gen_helper_mve_vmlaldavuh, NULL },
728         { gen_helper_mve_vmlaldavuw, NULL },
729         { NULL, NULL },
730     };
731     return do_long_dual_acc(s, a, fns[a->size][a->x]);
732 }
733 
734 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
735 {
736     static MVEGenLongDualAccOpFn * const fns[4][2] = {
737         { NULL, NULL },
738         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
739         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
740         { NULL, NULL },
741     };
742     return do_long_dual_acc(s, a, fns[a->size][a->x]);
743 }
744 
745 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
746 {
747     static MVEGenLongDualAccOpFn * const fns[] = {
748         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
749     };
750     return do_long_dual_acc(s, a, fns[a->x]);
751 }
752 
753 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
754 {
755     static MVEGenLongDualAccOpFn * const fns[] = {
756         gen_helper_mve_vrmlaldavhuw, NULL,
757     };
758     return do_long_dual_acc(s, a, fns[a->x]);
759 }
760 
761 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
762 {
763     static MVEGenLongDualAccOpFn * const fns[] = {
764         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
765     };
766     return do_long_dual_acc(s, a, fns[a->x]);
767 }
768 
769 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
770 {
771     TCGv_ptr qn, qm;
772     TCGv_i32 rda;
773 
774     if (!dc_isar_feature(aa32_mve, s) ||
775         !mve_check_qreg_bank(s, a->qn) ||
776         !fn) {
777         return false;
778     }
779     if (!mve_eci_check(s) || !vfp_access_check(s)) {
780         return true;
781     }
782 
783     qn = mve_qreg_ptr(a->qn);
784     qm = mve_qreg_ptr(a->qm);
785 
786     /*
787      * This insn is subject to beat-wise execution. Partial execution
788      * of an A=0 (no-accumulate) insn which does not execute the first
789      * beat must start with the current rda value, not 0.
790      */
791     if (a->a || mve_skip_first_beat(s)) {
792         rda = load_reg(s, a->rda);
793     } else {
794         rda = tcg_const_i32(0);
795     }
796 
797     fn(rda, cpu_env, qn, qm, rda);
798     store_reg(s, a->rda, rda);
799     tcg_temp_free_ptr(qn);
800     tcg_temp_free_ptr(qm);
801 
802     mve_update_eci(s);
803     return true;
804 }
805 
806 #define DO_DUAL_ACC(INSN, FN)                                           \
807     static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
808     {                                                                   \
809         static MVEGenDualAccOpFn * const fns[4][2] = {                  \
810             { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
811             { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
812             { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
813             { NULL, NULL },                                             \
814         };                                                              \
815         return do_dual_acc(s, a, fns[a->size][a->x]);                   \
816     }
817 
818 DO_DUAL_ACC(VMLADAV_S, vmladavs)
819 DO_DUAL_ACC(VMLSDAV, vmlsdav)
820 
821 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
822 {
823     static MVEGenDualAccOpFn * const fns[4][2] = {
824         { gen_helper_mve_vmladavub, NULL },
825         { gen_helper_mve_vmladavuh, NULL },
826         { gen_helper_mve_vmladavuw, NULL },
827         { NULL, NULL },
828     };
829     return do_dual_acc(s, a, fns[a->size][a->x]);
830 }
831 
832 static void gen_vpst(DisasContext *s, uint32_t mask)
833 {
834     /*
835      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
836      * being adjacent fields in the register.
837      *
838      * Updating the masks is not predicated, but it is subject to beat-wise
839      * execution, and the mask is updated on the odd-numbered beats.
840      * So if PSR.ECI says we should skip beat 1, we mustn't update the
841      * 01 mask field.
842      */
843     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
844     switch (s->eci) {
845     case ECI_NONE:
846     case ECI_A0:
847         /* Update both 01 and 23 fields */
848         tcg_gen_deposit_i32(vpr, vpr,
849                             tcg_constant_i32(mask | (mask << 4)),
850                             R_V7M_VPR_MASK01_SHIFT,
851                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
852         break;
853     case ECI_A0A1:
854     case ECI_A0A1A2:
855     case ECI_A0A1A2B0:
856         /* Update only the 23 mask field */
857         tcg_gen_deposit_i32(vpr, vpr,
858                             tcg_constant_i32(mask),
859                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
860         break;
861     default:
862         g_assert_not_reached();
863     }
864     store_cpu_field(vpr, v7m.vpr);
865 }
866 
867 static bool trans_VPST(DisasContext *s, arg_VPST *a)
868 {
869     /* mask == 0 is a "related encoding" */
870     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
871         return false;
872     }
873     if (!mve_eci_check(s) || !vfp_access_check(s)) {
874         return true;
875     }
876     gen_vpst(s, a->mask);
877     mve_update_and_store_eci(s);
878     return true;
879 }
880 
881 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
882 {
883     /* VADDV: vector add across vector */
884     static MVEGenVADDVFn * const fns[4][2] = {
885         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
886         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
887         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
888         { NULL, NULL }
889     };
890     TCGv_ptr qm;
891     TCGv_i32 rda;
892 
893     if (!dc_isar_feature(aa32_mve, s) ||
894         a->size == 3) {
895         return false;
896     }
897     if (!mve_eci_check(s) || !vfp_access_check(s)) {
898         return true;
899     }
900 
901     /*
902      * This insn is subject to beat-wise execution. Partial execution
903      * of an A=0 (no-accumulate) insn which does not execute the first
904      * beat must start with the current value of Rda, not zero.
905      */
906     if (a->a || mve_skip_first_beat(s)) {
907         /* Accumulate input from Rda */
908         rda = load_reg(s, a->rda);
909     } else {
910         /* Accumulate starting at zero */
911         rda = tcg_const_i32(0);
912     }
913 
914     qm = mve_qreg_ptr(a->qm);
915     fns[a->size][a->u](rda, cpu_env, qm, rda);
916     store_reg(s, a->rda, rda);
917     tcg_temp_free_ptr(qm);
918 
919     mve_update_eci(s);
920     return true;
921 }
922 
923 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
924 {
925     /*
926      * Vector Add Long Across Vector: accumulate the 32-bit
927      * elements of the vector into a 64-bit result stored in
928      * a pair of general-purpose registers.
929      * No need to check Qm's bank: it is only 3 bits in decode.
930      */
931     TCGv_ptr qm;
932     TCGv_i64 rda;
933     TCGv_i32 rdalo, rdahi;
934 
935     if (!dc_isar_feature(aa32_mve, s)) {
936         return false;
937     }
938     /*
939      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
940      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
941      */
942     if (a->rdahi == 13 || a->rdahi == 15) {
943         return false;
944     }
945     if (!mve_eci_check(s) || !vfp_access_check(s)) {
946         return true;
947     }
948 
949     /*
950      * This insn is subject to beat-wise execution. Partial execution
951      * of an A=0 (no-accumulate) insn which does not execute the first
952      * beat must start with the current value of RdaHi:RdaLo, not zero.
953      */
954     if (a->a || mve_skip_first_beat(s)) {
955         /* Accumulate input from RdaHi:RdaLo */
956         rda = tcg_temp_new_i64();
957         rdalo = load_reg(s, a->rdalo);
958         rdahi = load_reg(s, a->rdahi);
959         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
960         tcg_temp_free_i32(rdalo);
961         tcg_temp_free_i32(rdahi);
962     } else {
963         /* Accumulate starting at zero */
964         rda = tcg_const_i64(0);
965     }
966 
967     qm = mve_qreg_ptr(a->qm);
968     if (a->u) {
969         gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
970     } else {
971         gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
972     }
973     tcg_temp_free_ptr(qm);
974 
975     rdalo = tcg_temp_new_i32();
976     rdahi = tcg_temp_new_i32();
977     tcg_gen_extrl_i64_i32(rdalo, rda);
978     tcg_gen_extrh_i64_i32(rdahi, rda);
979     store_reg(s, a->rdalo, rdalo);
980     store_reg(s, a->rdahi, rdahi);
981     tcg_temp_free_i64(rda);
982     mve_update_eci(s);
983     return true;
984 }
985 
986 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
987 {
988     TCGv_ptr qd;
989     uint64_t imm;
990 
991     if (!dc_isar_feature(aa32_mve, s) ||
992         !mve_check_qreg_bank(s, a->qd) ||
993         !fn) {
994         return false;
995     }
996     if (!mve_eci_check(s) || !vfp_access_check(s)) {
997         return true;
998     }
999 
1000     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1001 
1002     qd = mve_qreg_ptr(a->qd);
1003     fn(cpu_env, qd, tcg_constant_i64(imm));
1004     tcg_temp_free_ptr(qd);
1005     mve_update_eci(s);
1006     return true;
1007 }
1008 
1009 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1010 {
1011     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1012     MVEGenOneOpImmFn *fn;
1013 
1014     if ((a->cmode & 1) && a->cmode < 12) {
1015         if (a->op) {
1016             /*
1017              * For op=1, the immediate will be inverted by asimd_imm_const(),
1018              * so the VBIC becomes a logical AND operation.
1019              */
1020             fn = gen_helper_mve_vandi;
1021         } else {
1022             fn = gen_helper_mve_vorri;
1023         }
1024     } else {
1025         /* There is one unallocated cmode/op combination in this space */
1026         if (a->cmode == 15 && a->op == 1) {
1027             return false;
1028         }
1029         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1030         fn = gen_helper_mve_vmovi;
1031     }
1032     return do_1imm(s, a, fn);
1033 }
1034 
1035 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1036                       bool negateshift)
1037 {
1038     TCGv_ptr qd, qm;
1039     int shift = a->shift;
1040 
1041     if (!dc_isar_feature(aa32_mve, s) ||
1042         !mve_check_qreg_bank(s, a->qd | a->qm) ||
1043         !fn) {
1044         return false;
1045     }
1046     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1047         return true;
1048     }
1049 
1050     /*
1051      * When we handle a right shift insn using a left-shift helper
1052      * which permits a negative shift count to indicate a right-shift,
1053      * we must negate the shift count.
1054      */
1055     if (negateshift) {
1056         shift = -shift;
1057     }
1058 
1059     qd = mve_qreg_ptr(a->qd);
1060     qm = mve_qreg_ptr(a->qm);
1061     fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1062     tcg_temp_free_ptr(qd);
1063     tcg_temp_free_ptr(qm);
1064     mve_update_eci(s);
1065     return true;
1066 }
1067 
1068 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)                         \
1069     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1070     {                                                           \
1071         static MVEGenTwoOpShiftFn * const fns[] = {             \
1072             gen_helper_mve_##FN##b,                             \
1073             gen_helper_mve_##FN##h,                             \
1074             gen_helper_mve_##FN##w,                             \
1075             NULL,                                               \
1076         };                                                      \
1077         return do_2shift(s, a, fns[a->size], NEGATESHIFT);      \
1078     }
1079 
1080 DO_2SHIFT(VSHLI, vshli_u, false)
1081 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1082 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1083 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1084 /* These right shifts use a left-shift helper with negated shift count */
1085 DO_2SHIFT(VSHRI_S, vshli_s, true)
1086 DO_2SHIFT(VSHRI_U, vshli_u, true)
1087 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1088 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1089 
1090 DO_2SHIFT(VSRI, vsri, false)
1091 DO_2SHIFT(VSLI, vsli, false)
1092 
1093 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1094                              MVEGenTwoOpShiftFn *fn)
1095 {
1096     TCGv_ptr qda;
1097     TCGv_i32 rm;
1098 
1099     if (!dc_isar_feature(aa32_mve, s) ||
1100         !mve_check_qreg_bank(s, a->qda) ||
1101         a->rm == 13 || a->rm == 15 || !fn) {
1102         /* Rm cases are UNPREDICTABLE */
1103         return false;
1104     }
1105     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1106         return true;
1107     }
1108 
1109     qda = mve_qreg_ptr(a->qda);
1110     rm = load_reg(s, a->rm);
1111     fn(cpu_env, qda, qda, rm);
1112     tcg_temp_free_ptr(qda);
1113     tcg_temp_free_i32(rm);
1114     mve_update_eci(s);
1115     return true;
1116 }
1117 
1118 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
1119     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
1120     {                                                                   \
1121         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1122             gen_helper_mve_##FN##b,                                     \
1123             gen_helper_mve_##FN##h,                                     \
1124             gen_helper_mve_##FN##w,                                     \
1125             NULL,                                                       \
1126         };                                                              \
1127         return do_2shift_scalar(s, a, fns[a->size]);                    \
1128     }
1129 
1130 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1131 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1132 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1133 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1134 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1135 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1136 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1137 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1138 
1139 #define DO_VSHLL(INSN, FN)                                      \
1140     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1141     {                                                           \
1142         static MVEGenTwoOpShiftFn * const fns[] = {             \
1143             gen_helper_mve_##FN##b,                             \
1144             gen_helper_mve_##FN##h,                             \
1145         };                                                      \
1146         return do_2shift(s, a, fns[a->size], false);            \
1147     }
1148 
1149 DO_VSHLL(VSHLL_BS, vshllbs)
1150 DO_VSHLL(VSHLL_BU, vshllbu)
1151 DO_VSHLL(VSHLL_TS, vshllts)
1152 DO_VSHLL(VSHLL_TU, vshlltu)
1153 
1154 #define DO_2SHIFT_N(INSN, FN)                                   \
1155     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1156     {                                                           \
1157         static MVEGenTwoOpShiftFn * const fns[] = {             \
1158             gen_helper_mve_##FN##b,                             \
1159             gen_helper_mve_##FN##h,                             \
1160         };                                                      \
1161         return do_2shift(s, a, fns[a->size], false);            \
1162     }
1163 
1164 DO_2SHIFT_N(VSHRNB, vshrnb)
1165 DO_2SHIFT_N(VSHRNT, vshrnt)
1166 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1167 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1168 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1169 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1170 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1171 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1172 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1173 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1174 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1175 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1176 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1177 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1178 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1179 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1180 
1181 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1182 {
1183     /*
1184      * Whole Vector Left Shift with Carry. The carry is taken
1185      * from a general purpose register and written back there.
1186      * An imm of 0 means "shift by 32".
1187      */
1188     TCGv_ptr qd;
1189     TCGv_i32 rdm;
1190 
1191     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1192         return false;
1193     }
1194     if (a->rdm == 13 || a->rdm == 15) {
1195         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1196         return false;
1197     }
1198     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1199         return true;
1200     }
1201 
1202     qd = mve_qreg_ptr(a->qd);
1203     rdm = load_reg(s, a->rdm);
1204     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1205     store_reg(s, a->rdm, rdm);
1206     tcg_temp_free_ptr(qd);
1207     mve_update_eci(s);
1208     return true;
1209 }
1210 
1211 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1212 {
1213     TCGv_ptr qd;
1214     TCGv_i32 rn;
1215 
1216     /*
1217      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1218      * This fills the vector with elements of successively increasing
1219      * or decreasing values, starting from Rn.
1220      */
1221     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1222         return false;
1223     }
1224     if (a->size == MO_64) {
1225         /* size 0b11 is another encoding */
1226         return false;
1227     }
1228     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1229         return true;
1230     }
1231 
1232     qd = mve_qreg_ptr(a->qd);
1233     rn = load_reg(s, a->rn);
1234     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1235     store_reg(s, a->rn, rn);
1236     tcg_temp_free_ptr(qd);
1237     mve_update_eci(s);
1238     return true;
1239 }
1240 
1241 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1242 {
1243     TCGv_ptr qd;
1244     TCGv_i32 rn, rm;
1245 
1246     /*
1247      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1248      * This fills the vector with elements of successively increasing
1249      * or decreasing values, starting from Rn. Rm specifies a point where
1250      * the count wraps back around to 0. The updated offset is written back
1251      * to Rn.
1252      */
1253     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1254         return false;
1255     }
1256     if (!fn || a->rm == 13 || a->rm == 15) {
1257         /*
1258          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1259          * Rm == 13 is VIWDUP, VDWDUP.
1260          */
1261         return false;
1262     }
1263     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1264         return true;
1265     }
1266 
1267     qd = mve_qreg_ptr(a->qd);
1268     rn = load_reg(s, a->rn);
1269     rm = load_reg(s, a->rm);
1270     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1271     store_reg(s, a->rn, rn);
1272     tcg_temp_free_ptr(qd);
1273     tcg_temp_free_i32(rm);
1274     mve_update_eci(s);
1275     return true;
1276 }
1277 
1278 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1279 {
1280     static MVEGenVIDUPFn * const fns[] = {
1281         gen_helper_mve_vidupb,
1282         gen_helper_mve_viduph,
1283         gen_helper_mve_vidupw,
1284         NULL,
1285     };
1286     return do_vidup(s, a, fns[a->size]);
1287 }
1288 
1289 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1290 {
1291     static MVEGenVIDUPFn * const fns[] = {
1292         gen_helper_mve_vidupb,
1293         gen_helper_mve_viduph,
1294         gen_helper_mve_vidupw,
1295         NULL,
1296     };
1297     /* VDDUP is just like VIDUP but with a negative immediate */
1298     a->imm = -a->imm;
1299     return do_vidup(s, a, fns[a->size]);
1300 }
1301 
1302 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1303 {
1304     static MVEGenVIWDUPFn * const fns[] = {
1305         gen_helper_mve_viwdupb,
1306         gen_helper_mve_viwduph,
1307         gen_helper_mve_viwdupw,
1308         NULL,
1309     };
1310     return do_viwdup(s, a, fns[a->size]);
1311 }
1312 
1313 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1314 {
1315     static MVEGenVIWDUPFn * const fns[] = {
1316         gen_helper_mve_vdwdupb,
1317         gen_helper_mve_vdwduph,
1318         gen_helper_mve_vdwdupw,
1319         NULL,
1320     };
1321     return do_viwdup(s, a, fns[a->size]);
1322 }
1323 
1324 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1325 {
1326     TCGv_ptr qn, qm;
1327 
1328     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1329         !fn) {
1330         return false;
1331     }
1332     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1333         return true;
1334     }
1335 
1336     qn = mve_qreg_ptr(a->qn);
1337     qm = mve_qreg_ptr(a->qm);
1338     fn(cpu_env, qn, qm);
1339     tcg_temp_free_ptr(qn);
1340     tcg_temp_free_ptr(qm);
1341     if (a->mask) {
1342         /* VPT */
1343         gen_vpst(s, a->mask);
1344     }
1345     mve_update_eci(s);
1346     return true;
1347 }
1348 
1349 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1350                            MVEGenScalarCmpFn *fn)
1351 {
1352     TCGv_ptr qn;
1353     TCGv_i32 rm;
1354 
1355     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1356         return false;
1357     }
1358     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1359         return true;
1360     }
1361 
1362     qn = mve_qreg_ptr(a->qn);
1363     if (a->rm == 15) {
1364         /* Encoding Rm=0b1111 means "constant zero" */
1365         rm = tcg_constant_i32(0);
1366     } else {
1367         rm = load_reg(s, a->rm);
1368     }
1369     fn(cpu_env, qn, rm);
1370     tcg_temp_free_ptr(qn);
1371     tcg_temp_free_i32(rm);
1372     if (a->mask) {
1373         /* VPT */
1374         gen_vpst(s, a->mask);
1375     }
1376     mve_update_eci(s);
1377     return true;
1378 }
1379 
1380 #define DO_VCMP(INSN, FN)                                       \
1381     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
1382     {                                                           \
1383         static MVEGenCmpFn * const fns[] = {                    \
1384             gen_helper_mve_##FN##b,                             \
1385             gen_helper_mve_##FN##h,                             \
1386             gen_helper_mve_##FN##w,                             \
1387             NULL,                                               \
1388         };                                                      \
1389         return do_vcmp(s, a, fns[a->size]);                     \
1390     }                                                           \
1391     static bool trans_##INSN##_scalar(DisasContext *s,          \
1392                                       arg_vcmp_scalar *a)       \
1393     {                                                           \
1394         static MVEGenScalarCmpFn * const fns[] = {              \
1395             gen_helper_mve_##FN##_scalarb,                      \
1396             gen_helper_mve_##FN##_scalarh,                      \
1397             gen_helper_mve_##FN##_scalarw,                      \
1398             NULL,                                               \
1399         };                                                      \
1400         return do_vcmp_scalar(s, a, fns[a->size]);              \
1401     }
1402 
1403 DO_VCMP(VCMPEQ, vcmpeq)
1404 DO_VCMP(VCMPNE, vcmpne)
1405 DO_VCMP(VCMPCS, vcmpcs)
1406 DO_VCMP(VCMPHI, vcmphi)
1407 DO_VCMP(VCMPGE, vcmpge)
1408 DO_VCMP(VCMPLT, vcmplt)
1409 DO_VCMP(VCMPGT, vcmpgt)
1410 DO_VCMP(VCMPLE, vcmple)
1411 
1412 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
1413 {
1414     /*
1415      * MIN/MAX operations across a vector: compute the min or
1416      * max of the initial value in a general purpose register
1417      * and all the elements in the vector, and store it back
1418      * into the general purpose register.
1419      */
1420     TCGv_ptr qm;
1421     TCGv_i32 rda;
1422 
1423     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1424         !fn || a->rda == 13 || a->rda == 15) {
1425         /* Rda cases are UNPREDICTABLE */
1426         return false;
1427     }
1428     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1429         return true;
1430     }
1431 
1432     qm = mve_qreg_ptr(a->qm);
1433     rda = load_reg(s, a->rda);
1434     fn(rda, cpu_env, qm, rda);
1435     store_reg(s, a->rda, rda);
1436     tcg_temp_free_ptr(qm);
1437     mve_update_eci(s);
1438     return true;
1439 }
1440 
1441 #define DO_VMAXV(INSN, FN)                                      \
1442     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
1443     {                                                           \
1444         static MVEGenVADDVFn * const fns[] = {                  \
1445             gen_helper_mve_##FN##b,                             \
1446             gen_helper_mve_##FN##h,                             \
1447             gen_helper_mve_##FN##w,                             \
1448             NULL,                                               \
1449         };                                                      \
1450         return do_vmaxv(s, a, fns[a->size]);                    \
1451     }
1452 
1453 DO_VMAXV(VMAXV_S, vmaxvs)
1454 DO_VMAXV(VMAXV_U, vmaxvu)
1455 DO_VMAXV(VMAXAV, vmaxav)
1456 DO_VMAXV(VMINV_S, vminvs)
1457 DO_VMAXV(VMINV_U, vminvu)
1458 DO_VMAXV(VMINAV, vminav)
1459 
1460 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
1461 {
1462     /* Absolute difference accumulated across vector */
1463     TCGv_ptr qn, qm;
1464     TCGv_i32 rda;
1465 
1466     if (!dc_isar_feature(aa32_mve, s) ||
1467         !mve_check_qreg_bank(s, a->qm | a->qn) ||
1468         !fn || a->rda == 13 || a->rda == 15) {
1469         /* Rda cases are UNPREDICTABLE */
1470         return false;
1471     }
1472     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1473         return true;
1474     }
1475 
1476     qm = mve_qreg_ptr(a->qm);
1477     qn = mve_qreg_ptr(a->qn);
1478     rda = load_reg(s, a->rda);
1479     fn(rda, cpu_env, qn, qm, rda);
1480     store_reg(s, a->rda, rda);
1481     tcg_temp_free_ptr(qm);
1482     tcg_temp_free_ptr(qn);
1483     mve_update_eci(s);
1484     return true;
1485 }
1486 
1487 #define DO_VABAV(INSN, FN)                                      \
1488     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
1489     {                                                           \
1490         static MVEGenVABAVFn * const fns[] = {                  \
1491             gen_helper_mve_##FN##b,                             \
1492             gen_helper_mve_##FN##h,                             \
1493             gen_helper_mve_##FN##w,                             \
1494             NULL,                                               \
1495         };                                                      \
1496         return do_vabav(s, a, fns[a->size]);                    \
1497     }
1498 
1499 DO_VABAV(VABAV_S, vabavs)
1500 DO_VABAV(VABAV_U, vabavu)
1501