xref: /qemu/target/arm/tcg/translate-mve.c (revision 54dc78a901188d208a3dfedb0f98230043509120)
1 /*
2  *  ARM translation: M-profile MVE instructions
3  *
4  *  Copyright (c) 2021 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
27 
28 static inline int vidup_imm(DisasContext *s, int x)
29 {
30     return 1 << x;
31 }
32 
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
35 
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
41 typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
49 
50 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
51 static inline long mve_qreg_offset(unsigned reg)
52 {
53     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
54 }
55 
56 static TCGv_ptr mve_qreg_ptr(unsigned reg)
57 {
58     TCGv_ptr ret = tcg_temp_new_ptr();
59     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
60     return ret;
61 }
62 
63 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
64 {
65     /*
66      * Check whether Qregs are in range. For v8.1M only Q0..Q7
67      * are supported, see VFPSmallRegisterBank().
68      */
69     return qmask < 8;
70 }
71 
72 bool mve_eci_check(DisasContext *s)
73 {
74     /*
75      * This is a beatwise insn: check that ECI is valid (not a
76      * reserved value) and note that we are handling it.
77      * Return true if OK, false if we generated an exception.
78      */
79     s->eci_handled = true;
80     switch (s->eci) {
81     case ECI_NONE:
82     case ECI_A0:
83     case ECI_A0A1:
84     case ECI_A0A1A2:
85     case ECI_A0A1A2B0:
86         return true;
87     default:
88         /* Reserved value: INVSTATE UsageFault */
89         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
90                            default_exception_el(s));
91         return false;
92     }
93 }
94 
95 static void mve_update_eci(DisasContext *s)
96 {
97     /*
98      * The helper function will always update the CPUState field,
99      * so we only need to update the DisasContext field.
100      */
101     if (s->eci) {
102         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
103     }
104 }
105 
106 void mve_update_and_store_eci(DisasContext *s)
107 {
108     /*
109      * For insns which don't call a helper function that will call
110      * mve_advance_vpt(), this version updates s->eci and also stores
111      * it out to the CPUState field.
112      */
113     if (s->eci) {
114         mve_update_eci(s);
115         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
116     }
117 }
118 
119 static bool mve_skip_first_beat(DisasContext *s)
120 {
121     /* Return true if PSR.ECI says we must skip the first beat of this insn */
122     switch (s->eci) {
123     case ECI_NONE:
124         return false;
125     case ECI_A0:
126     case ECI_A0A1:
127     case ECI_A0A1A2:
128     case ECI_A0A1A2B0:
129         return true;
130     default:
131         g_assert_not_reached();
132     }
133 }
134 
135 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
136                     unsigned msize)
137 {
138     TCGv_i32 addr;
139     uint32_t offset;
140     TCGv_ptr qreg;
141 
142     if (!dc_isar_feature(aa32_mve, s) ||
143         !mve_check_qreg_bank(s, a->qd) ||
144         !fn) {
145         return false;
146     }
147 
148     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
149     if (a->rn == 15 || (a->rn == 13 && a->w)) {
150         return false;
151     }
152 
153     if (!mve_eci_check(s) || !vfp_access_check(s)) {
154         return true;
155     }
156 
157     offset = a->imm << msize;
158     if (!a->a) {
159         offset = -offset;
160     }
161     addr = load_reg(s, a->rn);
162     if (a->p) {
163         tcg_gen_addi_i32(addr, addr, offset);
164     }
165 
166     qreg = mve_qreg_ptr(a->qd);
167     fn(cpu_env, qreg, addr);
168     tcg_temp_free_ptr(qreg);
169 
170     /*
171      * Writeback always happens after the last beat of the insn,
172      * regardless of predication
173      */
174     if (a->w) {
175         if (!a->p) {
176             tcg_gen_addi_i32(addr, addr, offset);
177         }
178         store_reg(s, a->rn, addr);
179     } else {
180         tcg_temp_free_i32(addr);
181     }
182     mve_update_eci(s);
183     return true;
184 }
185 
186 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
187 {
188     static MVEGenLdStFn * const ldstfns[4][2] = {
189         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
190         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
191         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
192         { NULL, NULL }
193     };
194     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
195 }
196 
197 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
198     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
199     {                                                           \
200         static MVEGenLdStFn * const ldstfns[2][2] = {           \
201             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
202             { NULL, gen_helper_mve_##ULD },                     \
203         };                                                      \
204         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
205     }
206 
207 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
208 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
209 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
210 
211 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
212 {
213     TCGv_ptr qd;
214     TCGv_i32 rt;
215 
216     if (!dc_isar_feature(aa32_mve, s) ||
217         !mve_check_qreg_bank(s, a->qd)) {
218         return false;
219     }
220     if (a->rt == 13 || a->rt == 15) {
221         /* UNPREDICTABLE; we choose to UNDEF */
222         return false;
223     }
224     if (!mve_eci_check(s) || !vfp_access_check(s)) {
225         return true;
226     }
227 
228     qd = mve_qreg_ptr(a->qd);
229     rt = load_reg(s, a->rt);
230     tcg_gen_dup_i32(a->size, rt, rt);
231     gen_helper_mve_vdup(cpu_env, qd, rt);
232     tcg_temp_free_ptr(qd);
233     tcg_temp_free_i32(rt);
234     mve_update_eci(s);
235     return true;
236 }
237 
238 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
239 {
240     TCGv_ptr qd, qm;
241 
242     if (!dc_isar_feature(aa32_mve, s) ||
243         !mve_check_qreg_bank(s, a->qd | a->qm) ||
244         !fn) {
245         return false;
246     }
247 
248     if (!mve_eci_check(s) || !vfp_access_check(s)) {
249         return true;
250     }
251 
252     qd = mve_qreg_ptr(a->qd);
253     qm = mve_qreg_ptr(a->qm);
254     fn(cpu_env, qd, qm);
255     tcg_temp_free_ptr(qd);
256     tcg_temp_free_ptr(qm);
257     mve_update_eci(s);
258     return true;
259 }
260 
261 #define DO_1OP(INSN, FN)                                        \
262     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
263     {                                                           \
264         static MVEGenOneOpFn * const fns[] = {                  \
265             gen_helper_mve_##FN##b,                             \
266             gen_helper_mve_##FN##h,                             \
267             gen_helper_mve_##FN##w,                             \
268             NULL,                                               \
269         };                                                      \
270         return do_1op(s, a, fns[a->size]);                      \
271     }
272 
273 DO_1OP(VCLZ, vclz)
274 DO_1OP(VCLS, vcls)
275 DO_1OP(VABS, vabs)
276 DO_1OP(VNEG, vneg)
277 
278 /* Narrowing moves: only size 0 and 1 are valid */
279 #define DO_VMOVN(INSN, FN) \
280     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
281     {                                                           \
282         static MVEGenOneOpFn * const fns[] = {                  \
283             gen_helper_mve_##FN##b,                             \
284             gen_helper_mve_##FN##h,                             \
285             NULL,                                               \
286             NULL,                                               \
287         };                                                      \
288         return do_1op(s, a, fns[a->size]);                      \
289     }
290 
291 DO_VMOVN(VMOVNB, vmovnb)
292 DO_VMOVN(VMOVNT, vmovnt)
293 DO_VMOVN(VQMOVUNB, vqmovunb)
294 DO_VMOVN(VQMOVUNT, vqmovunt)
295 DO_VMOVN(VQMOVN_BS, vqmovnbs)
296 DO_VMOVN(VQMOVN_TS, vqmovnts)
297 DO_VMOVN(VQMOVN_BU, vqmovnbu)
298 DO_VMOVN(VQMOVN_TU, vqmovntu)
299 
300 static bool trans_VREV16(DisasContext *s, arg_1op *a)
301 {
302     static MVEGenOneOpFn * const fns[] = {
303         gen_helper_mve_vrev16b,
304         NULL,
305         NULL,
306         NULL,
307     };
308     return do_1op(s, a, fns[a->size]);
309 }
310 
311 static bool trans_VREV32(DisasContext *s, arg_1op *a)
312 {
313     static MVEGenOneOpFn * const fns[] = {
314         gen_helper_mve_vrev32b,
315         gen_helper_mve_vrev32h,
316         NULL,
317         NULL,
318     };
319     return do_1op(s, a, fns[a->size]);
320 }
321 
322 static bool trans_VREV64(DisasContext *s, arg_1op *a)
323 {
324     static MVEGenOneOpFn * const fns[] = {
325         gen_helper_mve_vrev64b,
326         gen_helper_mve_vrev64h,
327         gen_helper_mve_vrev64w,
328         NULL,
329     };
330     return do_1op(s, a, fns[a->size]);
331 }
332 
333 static bool trans_VMVN(DisasContext *s, arg_1op *a)
334 {
335     return do_1op(s, a, gen_helper_mve_vmvn);
336 }
337 
338 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
339 {
340     static MVEGenOneOpFn * const fns[] = {
341         NULL,
342         gen_helper_mve_vfabsh,
343         gen_helper_mve_vfabss,
344         NULL,
345     };
346     if (!dc_isar_feature(aa32_mve_fp, s)) {
347         return false;
348     }
349     return do_1op(s, a, fns[a->size]);
350 }
351 
352 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
353 {
354     static MVEGenOneOpFn * const fns[] = {
355         NULL,
356         gen_helper_mve_vfnegh,
357         gen_helper_mve_vfnegs,
358         NULL,
359     };
360     if (!dc_isar_feature(aa32_mve_fp, s)) {
361         return false;
362     }
363     return do_1op(s, a, fns[a->size]);
364 }
365 
366 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
367 {
368     TCGv_ptr qd, qn, qm;
369 
370     if (!dc_isar_feature(aa32_mve, s) ||
371         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
372         !fn) {
373         return false;
374     }
375     if (!mve_eci_check(s) || !vfp_access_check(s)) {
376         return true;
377     }
378 
379     qd = mve_qreg_ptr(a->qd);
380     qn = mve_qreg_ptr(a->qn);
381     qm = mve_qreg_ptr(a->qm);
382     fn(cpu_env, qd, qn, qm);
383     tcg_temp_free_ptr(qd);
384     tcg_temp_free_ptr(qn);
385     tcg_temp_free_ptr(qm);
386     mve_update_eci(s);
387     return true;
388 }
389 
390 #define DO_LOGIC(INSN, HELPER)                                  \
391     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
392     {                                                           \
393         return do_2op(s, a, HELPER);                            \
394     }
395 
396 DO_LOGIC(VAND, gen_helper_mve_vand)
397 DO_LOGIC(VBIC, gen_helper_mve_vbic)
398 DO_LOGIC(VORR, gen_helper_mve_vorr)
399 DO_LOGIC(VORN, gen_helper_mve_vorn)
400 DO_LOGIC(VEOR, gen_helper_mve_veor)
401 
402 DO_LOGIC(VPSEL, gen_helper_mve_vpsel)
403 
404 #define DO_2OP(INSN, FN) \
405     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
406     {                                                           \
407         static MVEGenTwoOpFn * const fns[] = {                  \
408             gen_helper_mve_##FN##b,                             \
409             gen_helper_mve_##FN##h,                             \
410             gen_helper_mve_##FN##w,                             \
411             NULL,                                               \
412         };                                                      \
413         return do_2op(s, a, fns[a->size]);                      \
414     }
415 
416 DO_2OP(VADD, vadd)
417 DO_2OP(VSUB, vsub)
418 DO_2OP(VMUL, vmul)
419 DO_2OP(VMULH_S, vmulhs)
420 DO_2OP(VMULH_U, vmulhu)
421 DO_2OP(VRMULH_S, vrmulhs)
422 DO_2OP(VRMULH_U, vrmulhu)
423 DO_2OP(VMAX_S, vmaxs)
424 DO_2OP(VMAX_U, vmaxu)
425 DO_2OP(VMIN_S, vmins)
426 DO_2OP(VMIN_U, vminu)
427 DO_2OP(VABD_S, vabds)
428 DO_2OP(VABD_U, vabdu)
429 DO_2OP(VHADD_S, vhadds)
430 DO_2OP(VHADD_U, vhaddu)
431 DO_2OP(VHSUB_S, vhsubs)
432 DO_2OP(VHSUB_U, vhsubu)
433 DO_2OP(VMULL_BS, vmullbs)
434 DO_2OP(VMULL_BU, vmullbu)
435 DO_2OP(VMULL_TS, vmullts)
436 DO_2OP(VMULL_TU, vmulltu)
437 DO_2OP(VQDMULH, vqdmulh)
438 DO_2OP(VQRDMULH, vqrdmulh)
439 DO_2OP(VQADD_S, vqadds)
440 DO_2OP(VQADD_U, vqaddu)
441 DO_2OP(VQSUB_S, vqsubs)
442 DO_2OP(VQSUB_U, vqsubu)
443 DO_2OP(VSHL_S, vshls)
444 DO_2OP(VSHL_U, vshlu)
445 DO_2OP(VRSHL_S, vrshls)
446 DO_2OP(VRSHL_U, vrshlu)
447 DO_2OP(VQSHL_S, vqshls)
448 DO_2OP(VQSHL_U, vqshlu)
449 DO_2OP(VQRSHL_S, vqrshls)
450 DO_2OP(VQRSHL_U, vqrshlu)
451 DO_2OP(VQDMLADH, vqdmladh)
452 DO_2OP(VQDMLADHX, vqdmladhx)
453 DO_2OP(VQRDMLADH, vqrdmladh)
454 DO_2OP(VQRDMLADHX, vqrdmladhx)
455 DO_2OP(VQDMLSDH, vqdmlsdh)
456 DO_2OP(VQDMLSDHX, vqdmlsdhx)
457 DO_2OP(VQRDMLSDH, vqrdmlsdh)
458 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
459 DO_2OP(VRHADD_S, vrhadds)
460 DO_2OP(VRHADD_U, vrhaddu)
461 /*
462  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
463  * so we can reuse the DO_2OP macro. (Our implementation calculates the
464  * "expected" results in this case.) Similarly for VHCADD.
465  */
466 DO_2OP(VCADD90, vcadd90)
467 DO_2OP(VCADD270, vcadd270)
468 DO_2OP(VHCADD90, vhcadd90)
469 DO_2OP(VHCADD270, vhcadd270)
470 
471 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
472 {
473     static MVEGenTwoOpFn * const fns[] = {
474         NULL,
475         gen_helper_mve_vqdmullbh,
476         gen_helper_mve_vqdmullbw,
477         NULL,
478     };
479     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
480         /* UNPREDICTABLE; we choose to undef */
481         return false;
482     }
483     return do_2op(s, a, fns[a->size]);
484 }
485 
486 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
487 {
488     static MVEGenTwoOpFn * const fns[] = {
489         NULL,
490         gen_helper_mve_vqdmullth,
491         gen_helper_mve_vqdmulltw,
492         NULL,
493     };
494     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
495         /* UNPREDICTABLE; we choose to undef */
496         return false;
497     }
498     return do_2op(s, a, fns[a->size]);
499 }
500 
501 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
502 {
503     /*
504      * Note that a->size indicates the output size, ie VMULL.P8
505      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
506      * is the 16x16->32 operation and a->size is MO_32.
507      */
508     static MVEGenTwoOpFn * const fns[] = {
509         NULL,
510         gen_helper_mve_vmullpbh,
511         gen_helper_mve_vmullpbw,
512         NULL,
513     };
514     return do_2op(s, a, fns[a->size]);
515 }
516 
517 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
518 {
519     /* a->size is as for trans_VMULLP_B */
520     static MVEGenTwoOpFn * const fns[] = {
521         NULL,
522         gen_helper_mve_vmullpth,
523         gen_helper_mve_vmullptw,
524         NULL,
525     };
526     return do_2op(s, a, fns[a->size]);
527 }
528 
529 /*
530  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
531  * of the 32-bit elements in each lane of the input vectors, where the
532  * carry-out of each add is the carry-in of the next.  The initial carry
533  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
534  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
535  * These insns are subject to beat-wise execution.  Partial execution
536  * of an I=1 (initial carry input fixed) insn which does not
537  * execute the first beat must start with the current FPSCR.NZCV
538  * value, not the fixed constant input.
539  */
540 static bool trans_VADC(DisasContext *s, arg_2op *a)
541 {
542     return do_2op(s, a, gen_helper_mve_vadc);
543 }
544 
545 static bool trans_VADCI(DisasContext *s, arg_2op *a)
546 {
547     if (mve_skip_first_beat(s)) {
548         return trans_VADC(s, a);
549     }
550     return do_2op(s, a, gen_helper_mve_vadci);
551 }
552 
553 static bool trans_VSBC(DisasContext *s, arg_2op *a)
554 {
555     return do_2op(s, a, gen_helper_mve_vsbc);
556 }
557 
558 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
559 {
560     if (mve_skip_first_beat(s)) {
561         return trans_VSBC(s, a);
562     }
563     return do_2op(s, a, gen_helper_mve_vsbci);
564 }
565 
566 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
567                           MVEGenTwoOpScalarFn fn)
568 {
569     TCGv_ptr qd, qn;
570     TCGv_i32 rm;
571 
572     if (!dc_isar_feature(aa32_mve, s) ||
573         !mve_check_qreg_bank(s, a->qd | a->qn) ||
574         !fn) {
575         return false;
576     }
577     if (a->rm == 13 || a->rm == 15) {
578         /* UNPREDICTABLE */
579         return false;
580     }
581     if (!mve_eci_check(s) || !vfp_access_check(s)) {
582         return true;
583     }
584 
585     qd = mve_qreg_ptr(a->qd);
586     qn = mve_qreg_ptr(a->qn);
587     rm = load_reg(s, a->rm);
588     fn(cpu_env, qd, qn, rm);
589     tcg_temp_free_i32(rm);
590     tcg_temp_free_ptr(qd);
591     tcg_temp_free_ptr(qn);
592     mve_update_eci(s);
593     return true;
594 }
595 
596 #define DO_2OP_SCALAR(INSN, FN) \
597     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
598     {                                                           \
599         static MVEGenTwoOpScalarFn * const fns[] = {            \
600             gen_helper_mve_##FN##b,                             \
601             gen_helper_mve_##FN##h,                             \
602             gen_helper_mve_##FN##w,                             \
603             NULL,                                               \
604         };                                                      \
605         return do_2op_scalar(s, a, fns[a->size]);               \
606     }
607 
608 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
609 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
610 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
611 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
612 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
613 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
614 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
615 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
616 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
617 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
618 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
619 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
620 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
621 DO_2OP_SCALAR(VBRSR, vbrsr)
622 DO_2OP_SCALAR(VMLAS, vmlas)
623 
624 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
625 {
626     static MVEGenTwoOpScalarFn * const fns[] = {
627         NULL,
628         gen_helper_mve_vqdmullb_scalarh,
629         gen_helper_mve_vqdmullb_scalarw,
630         NULL,
631     };
632     if (a->qd == a->qn && a->size == MO_32) {
633         /* UNPREDICTABLE; we choose to undef */
634         return false;
635     }
636     return do_2op_scalar(s, a, fns[a->size]);
637 }
638 
639 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
640 {
641     static MVEGenTwoOpScalarFn * const fns[] = {
642         NULL,
643         gen_helper_mve_vqdmullt_scalarh,
644         gen_helper_mve_vqdmullt_scalarw,
645         NULL,
646     };
647     if (a->qd == a->qn && a->size == MO_32) {
648         /* UNPREDICTABLE; we choose to undef */
649         return false;
650     }
651     return do_2op_scalar(s, a, fns[a->size]);
652 }
653 
654 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
655                              MVEGenDualAccOpFn *fn)
656 {
657     TCGv_ptr qn, qm;
658     TCGv_i64 rda;
659     TCGv_i32 rdalo, rdahi;
660 
661     if (!dc_isar_feature(aa32_mve, s) ||
662         !mve_check_qreg_bank(s, a->qn | a->qm) ||
663         !fn) {
664         return false;
665     }
666     /*
667      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
668      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
669      */
670     if (a->rdahi == 13 || a->rdahi == 15) {
671         return false;
672     }
673     if (!mve_eci_check(s) || !vfp_access_check(s)) {
674         return true;
675     }
676 
677     qn = mve_qreg_ptr(a->qn);
678     qm = mve_qreg_ptr(a->qm);
679 
680     /*
681      * This insn is subject to beat-wise execution. Partial execution
682      * of an A=0 (no-accumulate) insn which does not execute the first
683      * beat must start with the current rda value, not 0.
684      */
685     if (a->a || mve_skip_first_beat(s)) {
686         rda = tcg_temp_new_i64();
687         rdalo = load_reg(s, a->rdalo);
688         rdahi = load_reg(s, a->rdahi);
689         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
690         tcg_temp_free_i32(rdalo);
691         tcg_temp_free_i32(rdahi);
692     } else {
693         rda = tcg_const_i64(0);
694     }
695 
696     fn(rda, cpu_env, qn, qm, rda);
697     tcg_temp_free_ptr(qn);
698     tcg_temp_free_ptr(qm);
699 
700     rdalo = tcg_temp_new_i32();
701     rdahi = tcg_temp_new_i32();
702     tcg_gen_extrl_i64_i32(rdalo, rda);
703     tcg_gen_extrh_i64_i32(rdahi, rda);
704     store_reg(s, a->rdalo, rdalo);
705     store_reg(s, a->rdahi, rdahi);
706     tcg_temp_free_i64(rda);
707     mve_update_eci(s);
708     return true;
709 }
710 
711 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
712 {
713     static MVEGenDualAccOpFn * const fns[4][2] = {
714         { NULL, NULL },
715         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
716         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
717         { NULL, NULL },
718     };
719     return do_long_dual_acc(s, a, fns[a->size][a->x]);
720 }
721 
722 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
723 {
724     static MVEGenDualAccOpFn * const fns[4][2] = {
725         { NULL, NULL },
726         { gen_helper_mve_vmlaldavuh, NULL },
727         { gen_helper_mve_vmlaldavuw, NULL },
728         { NULL, NULL },
729     };
730     return do_long_dual_acc(s, a, fns[a->size][a->x]);
731 }
732 
733 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
734 {
735     static MVEGenDualAccOpFn * const fns[4][2] = {
736         { NULL, NULL },
737         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
738         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
739         { NULL, NULL },
740     };
741     return do_long_dual_acc(s, a, fns[a->size][a->x]);
742 }
743 
744 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
745 {
746     static MVEGenDualAccOpFn * const fns[] = {
747         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
748     };
749     return do_long_dual_acc(s, a, fns[a->x]);
750 }
751 
752 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
753 {
754     static MVEGenDualAccOpFn * const fns[] = {
755         gen_helper_mve_vrmlaldavhuw, NULL,
756     };
757     return do_long_dual_acc(s, a, fns[a->x]);
758 }
759 
760 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
761 {
762     static MVEGenDualAccOpFn * const fns[] = {
763         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
764     };
765     return do_long_dual_acc(s, a, fns[a->x]);
766 }
767 
768 static void gen_vpst(DisasContext *s, uint32_t mask)
769 {
770     /*
771      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
772      * being adjacent fields in the register.
773      *
774      * Updating the masks is not predicated, but it is subject to beat-wise
775      * execution, and the mask is updated on the odd-numbered beats.
776      * So if PSR.ECI says we should skip beat 1, we mustn't update the
777      * 01 mask field.
778      */
779     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
780     switch (s->eci) {
781     case ECI_NONE:
782     case ECI_A0:
783         /* Update both 01 and 23 fields */
784         tcg_gen_deposit_i32(vpr, vpr,
785                             tcg_constant_i32(mask | (mask << 4)),
786                             R_V7M_VPR_MASK01_SHIFT,
787                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
788         break;
789     case ECI_A0A1:
790     case ECI_A0A1A2:
791     case ECI_A0A1A2B0:
792         /* Update only the 23 mask field */
793         tcg_gen_deposit_i32(vpr, vpr,
794                             tcg_constant_i32(mask),
795                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
796         break;
797     default:
798         g_assert_not_reached();
799     }
800     store_cpu_field(vpr, v7m.vpr);
801 }
802 
803 static bool trans_VPST(DisasContext *s, arg_VPST *a)
804 {
805     /* mask == 0 is a "related encoding" */
806     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
807         return false;
808     }
809     if (!mve_eci_check(s) || !vfp_access_check(s)) {
810         return true;
811     }
812     gen_vpst(s, a->mask);
813     mve_update_and_store_eci(s);
814     return true;
815 }
816 
817 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
818 {
819     /* VADDV: vector add across vector */
820     static MVEGenVADDVFn * const fns[4][2] = {
821         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
822         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
823         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
824         { NULL, NULL }
825     };
826     TCGv_ptr qm;
827     TCGv_i32 rda;
828 
829     if (!dc_isar_feature(aa32_mve, s) ||
830         a->size == 3) {
831         return false;
832     }
833     if (!mve_eci_check(s) || !vfp_access_check(s)) {
834         return true;
835     }
836 
837     /*
838      * This insn is subject to beat-wise execution. Partial execution
839      * of an A=0 (no-accumulate) insn which does not execute the first
840      * beat must start with the current value of Rda, not zero.
841      */
842     if (a->a || mve_skip_first_beat(s)) {
843         /* Accumulate input from Rda */
844         rda = load_reg(s, a->rda);
845     } else {
846         /* Accumulate starting at zero */
847         rda = tcg_const_i32(0);
848     }
849 
850     qm = mve_qreg_ptr(a->qm);
851     fns[a->size][a->u](rda, cpu_env, qm, rda);
852     store_reg(s, a->rda, rda);
853     tcg_temp_free_ptr(qm);
854 
855     mve_update_eci(s);
856     return true;
857 }
858 
859 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
860 {
861     /*
862      * Vector Add Long Across Vector: accumulate the 32-bit
863      * elements of the vector into a 64-bit result stored in
864      * a pair of general-purpose registers.
865      * No need to check Qm's bank: it is only 3 bits in decode.
866      */
867     TCGv_ptr qm;
868     TCGv_i64 rda;
869     TCGv_i32 rdalo, rdahi;
870 
871     if (!dc_isar_feature(aa32_mve, s)) {
872         return false;
873     }
874     /*
875      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
876      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
877      */
878     if (a->rdahi == 13 || a->rdahi == 15) {
879         return false;
880     }
881     if (!mve_eci_check(s) || !vfp_access_check(s)) {
882         return true;
883     }
884 
885     /*
886      * This insn is subject to beat-wise execution. Partial execution
887      * of an A=0 (no-accumulate) insn which does not execute the first
888      * beat must start with the current value of RdaHi:RdaLo, not zero.
889      */
890     if (a->a || mve_skip_first_beat(s)) {
891         /* Accumulate input from RdaHi:RdaLo */
892         rda = tcg_temp_new_i64();
893         rdalo = load_reg(s, a->rdalo);
894         rdahi = load_reg(s, a->rdahi);
895         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
896         tcg_temp_free_i32(rdalo);
897         tcg_temp_free_i32(rdahi);
898     } else {
899         /* Accumulate starting at zero */
900         rda = tcg_const_i64(0);
901     }
902 
903     qm = mve_qreg_ptr(a->qm);
904     if (a->u) {
905         gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
906     } else {
907         gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
908     }
909     tcg_temp_free_ptr(qm);
910 
911     rdalo = tcg_temp_new_i32();
912     rdahi = tcg_temp_new_i32();
913     tcg_gen_extrl_i64_i32(rdalo, rda);
914     tcg_gen_extrh_i64_i32(rdahi, rda);
915     store_reg(s, a->rdalo, rdalo);
916     store_reg(s, a->rdahi, rdahi);
917     tcg_temp_free_i64(rda);
918     mve_update_eci(s);
919     return true;
920 }
921 
922 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
923 {
924     TCGv_ptr qd;
925     uint64_t imm;
926 
927     if (!dc_isar_feature(aa32_mve, s) ||
928         !mve_check_qreg_bank(s, a->qd) ||
929         !fn) {
930         return false;
931     }
932     if (!mve_eci_check(s) || !vfp_access_check(s)) {
933         return true;
934     }
935 
936     imm = asimd_imm_const(a->imm, a->cmode, a->op);
937 
938     qd = mve_qreg_ptr(a->qd);
939     fn(cpu_env, qd, tcg_constant_i64(imm));
940     tcg_temp_free_ptr(qd);
941     mve_update_eci(s);
942     return true;
943 }
944 
945 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
946 {
947     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
948     MVEGenOneOpImmFn *fn;
949 
950     if ((a->cmode & 1) && a->cmode < 12) {
951         if (a->op) {
952             /*
953              * For op=1, the immediate will be inverted by asimd_imm_const(),
954              * so the VBIC becomes a logical AND operation.
955              */
956             fn = gen_helper_mve_vandi;
957         } else {
958             fn = gen_helper_mve_vorri;
959         }
960     } else {
961         /* There is one unallocated cmode/op combination in this space */
962         if (a->cmode == 15 && a->op == 1) {
963             return false;
964         }
965         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
966         fn = gen_helper_mve_vmovi;
967     }
968     return do_1imm(s, a, fn);
969 }
970 
971 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
972                       bool negateshift)
973 {
974     TCGv_ptr qd, qm;
975     int shift = a->shift;
976 
977     if (!dc_isar_feature(aa32_mve, s) ||
978         !mve_check_qreg_bank(s, a->qd | a->qm) ||
979         !fn) {
980         return false;
981     }
982     if (!mve_eci_check(s) || !vfp_access_check(s)) {
983         return true;
984     }
985 
986     /*
987      * When we handle a right shift insn using a left-shift helper
988      * which permits a negative shift count to indicate a right-shift,
989      * we must negate the shift count.
990      */
991     if (negateshift) {
992         shift = -shift;
993     }
994 
995     qd = mve_qreg_ptr(a->qd);
996     qm = mve_qreg_ptr(a->qm);
997     fn(cpu_env, qd, qm, tcg_constant_i32(shift));
998     tcg_temp_free_ptr(qd);
999     tcg_temp_free_ptr(qm);
1000     mve_update_eci(s);
1001     return true;
1002 }
1003 
1004 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)                         \
1005     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1006     {                                                           \
1007         static MVEGenTwoOpShiftFn * const fns[] = {             \
1008             gen_helper_mve_##FN##b,                             \
1009             gen_helper_mve_##FN##h,                             \
1010             gen_helper_mve_##FN##w,                             \
1011             NULL,                                               \
1012         };                                                      \
1013         return do_2shift(s, a, fns[a->size], NEGATESHIFT);      \
1014     }
1015 
1016 DO_2SHIFT(VSHLI, vshli_u, false)
1017 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1018 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1019 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1020 /* These right shifts use a left-shift helper with negated shift count */
1021 DO_2SHIFT(VSHRI_S, vshli_s, true)
1022 DO_2SHIFT(VSHRI_U, vshli_u, true)
1023 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1024 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1025 
1026 DO_2SHIFT(VSRI, vsri, false)
1027 DO_2SHIFT(VSLI, vsli, false)
1028 
1029 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1030                              MVEGenTwoOpShiftFn *fn)
1031 {
1032     TCGv_ptr qda;
1033     TCGv_i32 rm;
1034 
1035     if (!dc_isar_feature(aa32_mve, s) ||
1036         !mve_check_qreg_bank(s, a->qda) ||
1037         a->rm == 13 || a->rm == 15 || !fn) {
1038         /* Rm cases are UNPREDICTABLE */
1039         return false;
1040     }
1041     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1042         return true;
1043     }
1044 
1045     qda = mve_qreg_ptr(a->qda);
1046     rm = load_reg(s, a->rm);
1047     fn(cpu_env, qda, qda, rm);
1048     tcg_temp_free_ptr(qda);
1049     tcg_temp_free_i32(rm);
1050     mve_update_eci(s);
1051     return true;
1052 }
1053 
1054 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
1055     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
1056     {                                                                   \
1057         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1058             gen_helper_mve_##FN##b,                                     \
1059             gen_helper_mve_##FN##h,                                     \
1060             gen_helper_mve_##FN##w,                                     \
1061             NULL,                                                       \
1062         };                                                              \
1063         return do_2shift_scalar(s, a, fns[a->size]);                    \
1064     }
1065 
1066 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1067 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1068 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1069 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1070 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1071 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1072 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1073 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1074 
1075 #define DO_VSHLL(INSN, FN)                                      \
1076     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1077     {                                                           \
1078         static MVEGenTwoOpShiftFn * const fns[] = {             \
1079             gen_helper_mve_##FN##b,                             \
1080             gen_helper_mve_##FN##h,                             \
1081         };                                                      \
1082         return do_2shift(s, a, fns[a->size], false);            \
1083     }
1084 
1085 DO_VSHLL(VSHLL_BS, vshllbs)
1086 DO_VSHLL(VSHLL_BU, vshllbu)
1087 DO_VSHLL(VSHLL_TS, vshllts)
1088 DO_VSHLL(VSHLL_TU, vshlltu)
1089 
1090 #define DO_2SHIFT_N(INSN, FN)                                   \
1091     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1092     {                                                           \
1093         static MVEGenTwoOpShiftFn * const fns[] = {             \
1094             gen_helper_mve_##FN##b,                             \
1095             gen_helper_mve_##FN##h,                             \
1096         };                                                      \
1097         return do_2shift(s, a, fns[a->size], false);            \
1098     }
1099 
1100 DO_2SHIFT_N(VSHRNB, vshrnb)
1101 DO_2SHIFT_N(VSHRNT, vshrnt)
1102 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1103 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1104 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1105 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1106 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1107 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1108 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1109 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1110 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1111 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1112 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1113 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1114 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1115 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1116 
1117 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1118 {
1119     /*
1120      * Whole Vector Left Shift with Carry. The carry is taken
1121      * from a general purpose register and written back there.
1122      * An imm of 0 means "shift by 32".
1123      */
1124     TCGv_ptr qd;
1125     TCGv_i32 rdm;
1126 
1127     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1128         return false;
1129     }
1130     if (a->rdm == 13 || a->rdm == 15) {
1131         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1132         return false;
1133     }
1134     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1135         return true;
1136     }
1137 
1138     qd = mve_qreg_ptr(a->qd);
1139     rdm = load_reg(s, a->rdm);
1140     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1141     store_reg(s, a->rdm, rdm);
1142     tcg_temp_free_ptr(qd);
1143     mve_update_eci(s);
1144     return true;
1145 }
1146 
1147 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1148 {
1149     TCGv_ptr qd;
1150     TCGv_i32 rn;
1151 
1152     /*
1153      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1154      * This fills the vector with elements of successively increasing
1155      * or decreasing values, starting from Rn.
1156      */
1157     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1158         return false;
1159     }
1160     if (a->size == MO_64) {
1161         /* size 0b11 is another encoding */
1162         return false;
1163     }
1164     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1165         return true;
1166     }
1167 
1168     qd = mve_qreg_ptr(a->qd);
1169     rn = load_reg(s, a->rn);
1170     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1171     store_reg(s, a->rn, rn);
1172     tcg_temp_free_ptr(qd);
1173     mve_update_eci(s);
1174     return true;
1175 }
1176 
1177 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1178 {
1179     TCGv_ptr qd;
1180     TCGv_i32 rn, rm;
1181 
1182     /*
1183      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1184      * This fills the vector with elements of successively increasing
1185      * or decreasing values, starting from Rn. Rm specifies a point where
1186      * the count wraps back around to 0. The updated offset is written back
1187      * to Rn.
1188      */
1189     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1190         return false;
1191     }
1192     if (!fn || a->rm == 13 || a->rm == 15) {
1193         /*
1194          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1195          * Rm == 13 is VIWDUP, VDWDUP.
1196          */
1197         return false;
1198     }
1199     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1200         return true;
1201     }
1202 
1203     qd = mve_qreg_ptr(a->qd);
1204     rn = load_reg(s, a->rn);
1205     rm = load_reg(s, a->rm);
1206     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1207     store_reg(s, a->rn, rn);
1208     tcg_temp_free_ptr(qd);
1209     tcg_temp_free_i32(rm);
1210     mve_update_eci(s);
1211     return true;
1212 }
1213 
1214 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1215 {
1216     static MVEGenVIDUPFn * const fns[] = {
1217         gen_helper_mve_vidupb,
1218         gen_helper_mve_viduph,
1219         gen_helper_mve_vidupw,
1220         NULL,
1221     };
1222     return do_vidup(s, a, fns[a->size]);
1223 }
1224 
1225 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1226 {
1227     static MVEGenVIDUPFn * const fns[] = {
1228         gen_helper_mve_vidupb,
1229         gen_helper_mve_viduph,
1230         gen_helper_mve_vidupw,
1231         NULL,
1232     };
1233     /* VDDUP is just like VIDUP but with a negative immediate */
1234     a->imm = -a->imm;
1235     return do_vidup(s, a, fns[a->size]);
1236 }
1237 
1238 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1239 {
1240     static MVEGenVIWDUPFn * const fns[] = {
1241         gen_helper_mve_viwdupb,
1242         gen_helper_mve_viwduph,
1243         gen_helper_mve_viwdupw,
1244         NULL,
1245     };
1246     return do_viwdup(s, a, fns[a->size]);
1247 }
1248 
1249 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1250 {
1251     static MVEGenVIWDUPFn * const fns[] = {
1252         gen_helper_mve_vdwdupb,
1253         gen_helper_mve_vdwduph,
1254         gen_helper_mve_vdwdupw,
1255         NULL,
1256     };
1257     return do_viwdup(s, a, fns[a->size]);
1258 }
1259 
1260 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1261 {
1262     TCGv_ptr qn, qm;
1263 
1264     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1265         !fn) {
1266         return false;
1267     }
1268     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1269         return true;
1270     }
1271 
1272     qn = mve_qreg_ptr(a->qn);
1273     qm = mve_qreg_ptr(a->qm);
1274     fn(cpu_env, qn, qm);
1275     tcg_temp_free_ptr(qn);
1276     tcg_temp_free_ptr(qm);
1277     if (a->mask) {
1278         /* VPT */
1279         gen_vpst(s, a->mask);
1280     }
1281     mve_update_eci(s);
1282     return true;
1283 }
1284 
1285 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1286                            MVEGenScalarCmpFn *fn)
1287 {
1288     TCGv_ptr qn;
1289     TCGv_i32 rm;
1290 
1291     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1292         return false;
1293     }
1294     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1295         return true;
1296     }
1297 
1298     qn = mve_qreg_ptr(a->qn);
1299     if (a->rm == 15) {
1300         /* Encoding Rm=0b1111 means "constant zero" */
1301         rm = tcg_constant_i32(0);
1302     } else {
1303         rm = load_reg(s, a->rm);
1304     }
1305     fn(cpu_env, qn, rm);
1306     tcg_temp_free_ptr(qn);
1307     tcg_temp_free_i32(rm);
1308     if (a->mask) {
1309         /* VPT */
1310         gen_vpst(s, a->mask);
1311     }
1312     mve_update_eci(s);
1313     return true;
1314 }
1315 
1316 #define DO_VCMP(INSN, FN)                                       \
1317     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
1318     {                                                           \
1319         static MVEGenCmpFn * const fns[] = {                    \
1320             gen_helper_mve_##FN##b,                             \
1321             gen_helper_mve_##FN##h,                             \
1322             gen_helper_mve_##FN##w,                             \
1323             NULL,                                               \
1324         };                                                      \
1325         return do_vcmp(s, a, fns[a->size]);                     \
1326     }                                                           \
1327     static bool trans_##INSN##_scalar(DisasContext *s,          \
1328                                       arg_vcmp_scalar *a)       \
1329     {                                                           \
1330         static MVEGenScalarCmpFn * const fns[] = {              \
1331             gen_helper_mve_##FN##_scalarb,                      \
1332             gen_helper_mve_##FN##_scalarh,                      \
1333             gen_helper_mve_##FN##_scalarw,                      \
1334             NULL,                                               \
1335         };                                                      \
1336         return do_vcmp_scalar(s, a, fns[a->size]);              \
1337     }
1338 
1339 DO_VCMP(VCMPEQ, vcmpeq)
1340 DO_VCMP(VCMPNE, vcmpne)
1341 DO_VCMP(VCMPCS, vcmpcs)
1342 DO_VCMP(VCMPHI, vcmphi)
1343 DO_VCMP(VCMPGE, vcmpge)
1344 DO_VCMP(VCMPLT, vcmplt)
1345 DO_VCMP(VCMPGT, vcmpgt)
1346 DO_VCMP(VCMPLE, vcmple)
1347 
1348 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
1349 {
1350     /*
1351      * MIN/MAX operations across a vector: compute the min or
1352      * max of the initial value in a general purpose register
1353      * and all the elements in the vector, and store it back
1354      * into the general purpose register.
1355      */
1356     TCGv_ptr qm;
1357     TCGv_i32 rda;
1358 
1359     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1360         !fn || a->rda == 13 || a->rda == 15) {
1361         /* Rda cases are UNPREDICTABLE */
1362         return false;
1363     }
1364     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1365         return true;
1366     }
1367 
1368     qm = mve_qreg_ptr(a->qm);
1369     rda = load_reg(s, a->rda);
1370     fn(rda, cpu_env, qm, rda);
1371     store_reg(s, a->rda, rda);
1372     tcg_temp_free_ptr(qm);
1373     mve_update_eci(s);
1374     return true;
1375 }
1376 
1377 #define DO_VMAXV(INSN, FN)                                      \
1378     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
1379     {                                                           \
1380         static MVEGenVADDVFn * const fns[] = {                  \
1381             gen_helper_mve_##FN##b,                             \
1382             gen_helper_mve_##FN##h,                             \
1383             gen_helper_mve_##FN##w,                             \
1384             NULL,                                               \
1385         };                                                      \
1386         return do_vmaxv(s, a, fns[a->size]);                    \
1387     }
1388 
1389 DO_VMAXV(VMAXV_S, vmaxvs)
1390 DO_VMAXV(VMAXV_U, vmaxvu)
1391 DO_VMAXV(VMAXAV, vmaxav)
1392 DO_VMAXV(VMINV_S, vminvs)
1393 DO_VMAXV(VMINV_U, vminvu)
1394 DO_VMAXV(VMINAV, vminav)
1395 
1396 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
1397 {
1398     /* Absolute difference accumulated across vector */
1399     TCGv_ptr qn, qm;
1400     TCGv_i32 rda;
1401 
1402     if (!dc_isar_feature(aa32_mve, s) ||
1403         !mve_check_qreg_bank(s, a->qm | a->qn) ||
1404         !fn || a->rda == 13 || a->rda == 15) {
1405         /* Rda cases are UNPREDICTABLE */
1406         return false;
1407     }
1408     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1409         return true;
1410     }
1411 
1412     qm = mve_qreg_ptr(a->qm);
1413     qn = mve_qreg_ptr(a->qn);
1414     rda = load_reg(s, a->rda);
1415     fn(rda, cpu_env, qn, qm, rda);
1416     store_reg(s, a->rda, rda);
1417     tcg_temp_free_ptr(qm);
1418     tcg_temp_free_ptr(qn);
1419     mve_update_eci(s);
1420     return true;
1421 }
1422 
1423 #define DO_VABAV(INSN, FN)                                      \
1424     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
1425     {                                                           \
1426         static MVEGenVABAVFn * const fns[] = {                  \
1427             gen_helper_mve_##FN##b,                             \
1428             gen_helper_mve_##FN##h,                             \
1429             gen_helper_mve_##FN##w,                             \
1430             NULL,                                               \
1431         };                                                      \
1432         return do_vabav(s, a, fns[a->size]);                    \
1433     }
1434 
1435 DO_VABAV(VABAV_S, vabavs)
1436 DO_VABAV(VABAV_U, vabavu)
1437