xref: /qemu/target/arm/tcg/translate-mve.c (revision 98e40fbd792e13e98abd7f3d17f18a24edea4984)
1 /*
2  *  ARM translation: M-profile MVE instructions
3  *
4  *  Copyright (c) 2021 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
27 
28 static inline int vidup_imm(DisasContext *s, int x)
29 {
30     return 1 << x;
31 }
32 
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
35 
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32);
39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
53 
54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
55 static inline long mve_qreg_offset(unsigned reg)
56 {
57     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
58 }
59 
60 static TCGv_ptr mve_qreg_ptr(unsigned reg)
61 {
62     TCGv_ptr ret = tcg_temp_new_ptr();
63     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
64     return ret;
65 }
66 
67 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
68 {
69     /*
70      * Check whether Qregs are in range. For v8.1M only Q0..Q7
71      * are supported, see VFPSmallRegisterBank().
72      */
73     return qmask < 8;
74 }
75 
76 bool mve_eci_check(DisasContext *s)
77 {
78     /*
79      * This is a beatwise insn: check that ECI is valid (not a
80      * reserved value) and note that we are handling it.
81      * Return true if OK, false if we generated an exception.
82      */
83     s->eci_handled = true;
84     switch (s->eci) {
85     case ECI_NONE:
86     case ECI_A0:
87     case ECI_A0A1:
88     case ECI_A0A1A2:
89     case ECI_A0A1A2B0:
90         return true;
91     default:
92         /* Reserved value: INVSTATE UsageFault */
93         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
94                            default_exception_el(s));
95         return false;
96     }
97 }
98 
99 void mve_update_eci(DisasContext *s)
100 {
101     /*
102      * The helper function will always update the CPUState field,
103      * so we only need to update the DisasContext field.
104      */
105     if (s->eci) {
106         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
107     }
108 }
109 
110 void mve_update_and_store_eci(DisasContext *s)
111 {
112     /*
113      * For insns which don't call a helper function that will call
114      * mve_advance_vpt(), this version updates s->eci and also stores
115      * it out to the CPUState field.
116      */
117     if (s->eci) {
118         mve_update_eci(s);
119         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
120     }
121 }
122 
123 static bool mve_skip_first_beat(DisasContext *s)
124 {
125     /* Return true if PSR.ECI says we must skip the first beat of this insn */
126     switch (s->eci) {
127     case ECI_NONE:
128         return false;
129     case ECI_A0:
130     case ECI_A0A1:
131     case ECI_A0A1A2:
132     case ECI_A0A1A2B0:
133         return true;
134     default:
135         g_assert_not_reached();
136     }
137 }
138 
139 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
140                     unsigned msize)
141 {
142     TCGv_i32 addr;
143     uint32_t offset;
144     TCGv_ptr qreg;
145 
146     if (!dc_isar_feature(aa32_mve, s) ||
147         !mve_check_qreg_bank(s, a->qd) ||
148         !fn) {
149         return false;
150     }
151 
152     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
153     if (a->rn == 15 || (a->rn == 13 && a->w)) {
154         return false;
155     }
156 
157     if (!mve_eci_check(s) || !vfp_access_check(s)) {
158         return true;
159     }
160 
161     offset = a->imm << msize;
162     if (!a->a) {
163         offset = -offset;
164     }
165     addr = load_reg(s, a->rn);
166     if (a->p) {
167         tcg_gen_addi_i32(addr, addr, offset);
168     }
169 
170     qreg = mve_qreg_ptr(a->qd);
171     fn(cpu_env, qreg, addr);
172     tcg_temp_free_ptr(qreg);
173 
174     /*
175      * Writeback always happens after the last beat of the insn,
176      * regardless of predication
177      */
178     if (a->w) {
179         if (!a->p) {
180             tcg_gen_addi_i32(addr, addr, offset);
181         }
182         store_reg(s, a->rn, addr);
183     } else {
184         tcg_temp_free_i32(addr);
185     }
186     mve_update_eci(s);
187     return true;
188 }
189 
190 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
191 {
192     static MVEGenLdStFn * const ldstfns[4][2] = {
193         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
194         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
195         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
196         { NULL, NULL }
197     };
198     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
199 }
200 
201 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
202     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
203     {                                                           \
204         static MVEGenLdStFn * const ldstfns[2][2] = {           \
205             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
206             { NULL, gen_helper_mve_##ULD },                     \
207         };                                                      \
208         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
209     }
210 
211 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
212 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
213 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
214 
215 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
216 {
217     TCGv_i32 addr;
218     TCGv_ptr qd, qm;
219 
220     if (!dc_isar_feature(aa32_mve, s) ||
221         !mve_check_qreg_bank(s, a->qd | a->qm) ||
222         !fn || a->rn == 15) {
223         /* Rn case is UNPREDICTABLE */
224         return false;
225     }
226 
227     if (!mve_eci_check(s) || !vfp_access_check(s)) {
228         return true;
229     }
230 
231     addr = load_reg(s, a->rn);
232 
233     qd = mve_qreg_ptr(a->qd);
234     qm = mve_qreg_ptr(a->qm);
235     fn(cpu_env, qd, qm, addr);
236     tcg_temp_free_ptr(qd);
237     tcg_temp_free_ptr(qm);
238     tcg_temp_free_i32(addr);
239     mve_update_eci(s);
240     return true;
241 }
242 
243 /*
244  * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
245  * signextended to halfword elements in register". _os_ indicates that
246  * the offsets in Qm should be scaled by the element size.
247  */
248 /* This macro is just to make the arrays more compact in these functions */
249 #define F(N) gen_helper_mve_##N
250 
251 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
252 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
253 {
254     static MVEGenLdStSGFn * const fns[2][4][4] = { {
255             { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
256             { NULL, NULL,           F(vldrh_sg_sw), NULL },
257             { NULL, NULL,           NULL,           NULL },
258             { NULL, NULL,           NULL,           NULL }
259         }, {
260             { NULL, NULL,              NULL,              NULL },
261             { NULL, NULL,              F(vldrh_sg_os_sw), NULL },
262             { NULL, NULL,              NULL,              NULL },
263             { NULL, NULL,              NULL,              NULL }
264         }
265     };
266     if (a->qd == a->qm) {
267         return false; /* UNPREDICTABLE */
268     }
269     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
270 }
271 
272 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
273 {
274     static MVEGenLdStSGFn * const fns[2][4][4] = { {
275             { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
276             { NULL,           F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
277             { NULL,           NULL,           F(vldrw_sg_uw), NULL },
278             { NULL,           NULL,           NULL,           F(vldrd_sg_ud) }
279         }, {
280             { NULL, NULL,              NULL,              NULL },
281             { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
282             { NULL, NULL,              F(vldrw_sg_os_uw), NULL },
283             { NULL, NULL,              NULL,              F(vldrd_sg_os_ud) }
284         }
285     };
286     if (a->qd == a->qm) {
287         return false; /* UNPREDICTABLE */
288     }
289     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
290 }
291 
292 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
293 {
294     static MVEGenLdStSGFn * const fns[2][4][4] = { {
295             { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
296             { NULL,           F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
297             { NULL,           NULL,           F(vstrw_sg_uw), NULL },
298             { NULL,           NULL,           NULL,           F(vstrd_sg_ud) }
299         }, {
300             { NULL, NULL,              NULL,              NULL },
301             { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
302             { NULL, NULL,              F(vstrw_sg_os_uw), NULL },
303             { NULL, NULL,              NULL,              F(vstrd_sg_os_ud) }
304         }
305     };
306     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
307 }
308 
309 #undef F
310 
311 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
312                            MVEGenLdStSGFn *fn, unsigned msize)
313 {
314     uint32_t offset;
315     TCGv_ptr qd, qm;
316 
317     if (!dc_isar_feature(aa32_mve, s) ||
318         !mve_check_qreg_bank(s, a->qd | a->qm) ||
319         !fn) {
320         return false;
321     }
322 
323     if (!mve_eci_check(s) || !vfp_access_check(s)) {
324         return true;
325     }
326 
327     offset = a->imm << msize;
328     if (!a->a) {
329         offset = -offset;
330     }
331 
332     qd = mve_qreg_ptr(a->qd);
333     qm = mve_qreg_ptr(a->qm);
334     fn(cpu_env, qd, qm, tcg_constant_i32(offset));
335     tcg_temp_free_ptr(qd);
336     tcg_temp_free_ptr(qm);
337     mve_update_eci(s);
338     return true;
339 }
340 
341 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
342 {
343     static MVEGenLdStSGFn * const fns[] = {
344         gen_helper_mve_vldrw_sg_uw,
345         gen_helper_mve_vldrw_sg_wb_uw,
346     };
347     if (a->qd == a->qm) {
348         return false; /* UNPREDICTABLE */
349     }
350     return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
351 }
352 
353 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
354 {
355     static MVEGenLdStSGFn * const fns[] = {
356         gen_helper_mve_vldrd_sg_ud,
357         gen_helper_mve_vldrd_sg_wb_ud,
358     };
359     if (a->qd == a->qm) {
360         return false; /* UNPREDICTABLE */
361     }
362     return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
363 }
364 
365 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
366 {
367     static MVEGenLdStSGFn * const fns[] = {
368         gen_helper_mve_vstrw_sg_uw,
369         gen_helper_mve_vstrw_sg_wb_uw,
370     };
371     return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
372 }
373 
374 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
375 {
376     static MVEGenLdStSGFn * const fns[] = {
377         gen_helper_mve_vstrd_sg_ud,
378         gen_helper_mve_vstrd_sg_wb_ud,
379     };
380     return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
381 }
382 
383 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn,
384                         int addrinc)
385 {
386     TCGv_i32 rn;
387 
388     if (!dc_isar_feature(aa32_mve, s) ||
389         !mve_check_qreg_bank(s, a->qd) ||
390         !fn || (a->rn == 13 && a->w) || a->rn == 15) {
391         /* Variously UNPREDICTABLE or UNDEF or related-encoding */
392         return false;
393     }
394     if (!mve_eci_check(s) || !vfp_access_check(s)) {
395         return true;
396     }
397 
398     rn = load_reg(s, a->rn);
399     /*
400      * We pass the index of Qd, not a pointer, because the helper must
401      * access multiple Q registers starting at Qd and working up.
402      */
403     fn(cpu_env, tcg_constant_i32(a->qd), rn);
404 
405     if (a->w) {
406         tcg_gen_addi_i32(rn, rn, addrinc);
407         store_reg(s, a->rn, rn);
408     } else {
409         tcg_temp_free_i32(rn);
410     }
411     mve_update_and_store_eci(s);
412     return true;
413 }
414 
415 /* This macro is just to make the arrays more compact in these functions */
416 #define F(N) gen_helper_mve_##N
417 
418 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a)
419 {
420     static MVEGenLdStIlFn * const fns[4][4] = {
421         { F(vld20b), F(vld20h), F(vld20w), NULL, },
422         { F(vld21b), F(vld21h), F(vld21w), NULL, },
423         { NULL, NULL, NULL, NULL },
424         { NULL, NULL, NULL, NULL },
425     };
426     if (a->qd > 6) {
427         return false;
428     }
429     return do_vldst_il(s, a, fns[a->pat][a->size], 32);
430 }
431 
432 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a)
433 {
434     static MVEGenLdStIlFn * const fns[4][4] = {
435         { F(vld40b), F(vld40h), F(vld40w), NULL, },
436         { F(vld41b), F(vld41h), F(vld41w), NULL, },
437         { F(vld42b), F(vld42h), F(vld42w), NULL, },
438         { F(vld43b), F(vld43h), F(vld43w), NULL, },
439     };
440     if (a->qd > 4) {
441         return false;
442     }
443     return do_vldst_il(s, a, fns[a->pat][a->size], 64);
444 }
445 
446 static bool trans_VST2(DisasContext *s, arg_vldst_il *a)
447 {
448     static MVEGenLdStIlFn * const fns[4][4] = {
449         { F(vst20b), F(vst20h), F(vst20w), NULL, },
450         { F(vst21b), F(vst21h), F(vst21w), NULL, },
451         { NULL, NULL, NULL, NULL },
452         { NULL, NULL, NULL, NULL },
453     };
454     if (a->qd > 6) {
455         return false;
456     }
457     return do_vldst_il(s, a, fns[a->pat][a->size], 32);
458 }
459 
460 static bool trans_VST4(DisasContext *s, arg_vldst_il *a)
461 {
462     static MVEGenLdStIlFn * const fns[4][4] = {
463         { F(vst40b), F(vst40h), F(vst40w), NULL, },
464         { F(vst41b), F(vst41h), F(vst41w), NULL, },
465         { F(vst42b), F(vst42h), F(vst42w), NULL, },
466         { F(vst43b), F(vst43h), F(vst43w), NULL, },
467     };
468     if (a->qd > 4) {
469         return false;
470     }
471     return do_vldst_il(s, a, fns[a->pat][a->size], 64);
472 }
473 
474 #undef F
475 
476 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
477 {
478     TCGv_ptr qd;
479     TCGv_i32 rt;
480 
481     if (!dc_isar_feature(aa32_mve, s) ||
482         !mve_check_qreg_bank(s, a->qd)) {
483         return false;
484     }
485     if (a->rt == 13 || a->rt == 15) {
486         /* UNPREDICTABLE; we choose to UNDEF */
487         return false;
488     }
489     if (!mve_eci_check(s) || !vfp_access_check(s)) {
490         return true;
491     }
492 
493     qd = mve_qreg_ptr(a->qd);
494     rt = load_reg(s, a->rt);
495     tcg_gen_dup_i32(a->size, rt, rt);
496     gen_helper_mve_vdup(cpu_env, qd, rt);
497     tcg_temp_free_ptr(qd);
498     tcg_temp_free_i32(rt);
499     mve_update_eci(s);
500     return true;
501 }
502 
503 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
504 {
505     TCGv_ptr qd, qm;
506 
507     if (!dc_isar_feature(aa32_mve, s) ||
508         !mve_check_qreg_bank(s, a->qd | a->qm) ||
509         !fn) {
510         return false;
511     }
512 
513     if (!mve_eci_check(s) || !vfp_access_check(s)) {
514         return true;
515     }
516 
517     qd = mve_qreg_ptr(a->qd);
518     qm = mve_qreg_ptr(a->qm);
519     fn(cpu_env, qd, qm);
520     tcg_temp_free_ptr(qd);
521     tcg_temp_free_ptr(qm);
522     mve_update_eci(s);
523     return true;
524 }
525 
526 #define DO_1OP(INSN, FN)                                        \
527     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
528     {                                                           \
529         static MVEGenOneOpFn * const fns[] = {                  \
530             gen_helper_mve_##FN##b,                             \
531             gen_helper_mve_##FN##h,                             \
532             gen_helper_mve_##FN##w,                             \
533             NULL,                                               \
534         };                                                      \
535         return do_1op(s, a, fns[a->size]);                      \
536     }
537 
538 DO_1OP(VCLZ, vclz)
539 DO_1OP(VCLS, vcls)
540 DO_1OP(VABS, vabs)
541 DO_1OP(VNEG, vneg)
542 DO_1OP(VQABS, vqabs)
543 DO_1OP(VQNEG, vqneg)
544 DO_1OP(VMAXA, vmaxa)
545 DO_1OP(VMINA, vmina)
546 
547 /*
548  * For simple float/int conversions we use the fixed-point
549  * conversion helpers with a zero shift count
550  */
551 #define DO_VCVT(INSN, HFN, SFN)                                         \
552     static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
553     {                                                                   \
554         gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0));         \
555     }                                                                   \
556     static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
557     {                                                                   \
558         gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0));         \
559     }                                                                   \
560     static bool trans_##INSN(DisasContext *s, arg_1op *a)               \
561     {                                                                   \
562         static MVEGenOneOpFn * const fns[] = {                          \
563             NULL,                                                       \
564             gen_##INSN##h,                                              \
565             gen_##INSN##s,                                              \
566             NULL,                                                       \
567         };                                                              \
568         if (!dc_isar_feature(aa32_mve_fp, s)) {                         \
569             return false;                                               \
570         }                                                               \
571         return do_1op(s, a, fns[a->size]);                              \
572     }
573 
574 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf)
575 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf)
576 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs)
577 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu)
578 
579 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a,
580                           enum arm_fprounding rmode, bool u)
581 {
582     /*
583      * Handle VCVT fp to int with specified rounding mode.
584      * This is a 1op fn but we must pass the rounding mode as
585      * an immediate to the helper.
586      */
587     TCGv_ptr qd, qm;
588     static MVEGenVCVTRmodeFn * const fns[4][2] = {
589         { NULL, NULL },
590         { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh },
591         { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us },
592         { NULL, NULL },
593     };
594     MVEGenVCVTRmodeFn *fn = fns[a->size][u];
595 
596     if (!dc_isar_feature(aa32_mve_fp, s) ||
597         !mve_check_qreg_bank(s, a->qd | a->qm) ||
598         !fn) {
599         return false;
600     }
601 
602     if (!mve_eci_check(s) || !vfp_access_check(s)) {
603         return true;
604     }
605 
606     qd = mve_qreg_ptr(a->qd);
607     qm = mve_qreg_ptr(a->qm);
608     fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode)));
609     tcg_temp_free_ptr(qd);
610     tcg_temp_free_ptr(qm);
611     mve_update_eci(s);
612     return true;
613 }
614 
615 #define DO_VCVT_RMODE(INSN, RMODE, U)                           \
616     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
617     {                                                           \
618         return do_vcvt_rmode(s, a, RMODE, U);                   \
619     }                                                           \
620 
621 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
622 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true)
623 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false)
624 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true)
625 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false)
626 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
627 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
628 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
629 
630 #define DO_VCVT_SH(INSN, FN)                                    \
631     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
632     {                                                           \
633         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
634             return false;                                       \
635         }                                                       \
636         return do_1op(s, a, gen_helper_mve_##FN);               \
637     }                                                           \
638 
639 DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
640 DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
641 DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
642 DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
643 
644 #define DO_VRINT(INSN, RMODE)                                           \
645     static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
646     {                                                                   \
647         gen_helper_mve_vrint_rm_h(env, qd, qm,                          \
648                                   tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
649     }                                                                   \
650     static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
651     {                                                                   \
652         gen_helper_mve_vrint_rm_s(env, qd, qm,                          \
653                                   tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
654     }                                                                   \
655     static bool trans_##INSN(DisasContext *s, arg_1op *a)               \
656     {                                                                   \
657         static MVEGenOneOpFn * const fns[] = {                          \
658             NULL,                                                       \
659             gen_##INSN##h,                                              \
660             gen_##INSN##s,                                              \
661             NULL,                                                       \
662         };                                                              \
663         if (!dc_isar_feature(aa32_mve_fp, s)) {                         \
664             return false;                                               \
665         }                                                               \
666         return do_1op(s, a, fns[a->size]);                              \
667     }
668 
669 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
670 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
671 DO_VRINT(VRINTZ, FPROUNDING_ZERO)
672 DO_VRINT(VRINTM, FPROUNDING_NEGINF)
673 DO_VRINT(VRINTP, FPROUNDING_POSINF)
674 
675 static bool trans_VRINTX(DisasContext *s, arg_1op *a)
676 {
677     static MVEGenOneOpFn * const fns[] = {
678         NULL,
679         gen_helper_mve_vrintx_h,
680         gen_helper_mve_vrintx_s,
681         NULL,
682     };
683     if (!dc_isar_feature(aa32_mve_fp, s)) {
684         return false;
685     }
686     return do_1op(s, a, fns[a->size]);
687 }
688 
689 /* Narrowing moves: only size 0 and 1 are valid */
690 #define DO_VMOVN(INSN, FN) \
691     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
692     {                                                           \
693         static MVEGenOneOpFn * const fns[] = {                  \
694             gen_helper_mve_##FN##b,                             \
695             gen_helper_mve_##FN##h,                             \
696             NULL,                                               \
697             NULL,                                               \
698         };                                                      \
699         return do_1op(s, a, fns[a->size]);                      \
700     }
701 
702 DO_VMOVN(VMOVNB, vmovnb)
703 DO_VMOVN(VMOVNT, vmovnt)
704 DO_VMOVN(VQMOVUNB, vqmovunb)
705 DO_VMOVN(VQMOVUNT, vqmovunt)
706 DO_VMOVN(VQMOVN_BS, vqmovnbs)
707 DO_VMOVN(VQMOVN_TS, vqmovnts)
708 DO_VMOVN(VQMOVN_BU, vqmovnbu)
709 DO_VMOVN(VQMOVN_TU, vqmovntu)
710 
711 static bool trans_VREV16(DisasContext *s, arg_1op *a)
712 {
713     static MVEGenOneOpFn * const fns[] = {
714         gen_helper_mve_vrev16b,
715         NULL,
716         NULL,
717         NULL,
718     };
719     return do_1op(s, a, fns[a->size]);
720 }
721 
722 static bool trans_VREV32(DisasContext *s, arg_1op *a)
723 {
724     static MVEGenOneOpFn * const fns[] = {
725         gen_helper_mve_vrev32b,
726         gen_helper_mve_vrev32h,
727         NULL,
728         NULL,
729     };
730     return do_1op(s, a, fns[a->size]);
731 }
732 
733 static bool trans_VREV64(DisasContext *s, arg_1op *a)
734 {
735     static MVEGenOneOpFn * const fns[] = {
736         gen_helper_mve_vrev64b,
737         gen_helper_mve_vrev64h,
738         gen_helper_mve_vrev64w,
739         NULL,
740     };
741     return do_1op(s, a, fns[a->size]);
742 }
743 
744 static bool trans_VMVN(DisasContext *s, arg_1op *a)
745 {
746     return do_1op(s, a, gen_helper_mve_vmvn);
747 }
748 
749 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
750 {
751     static MVEGenOneOpFn * const fns[] = {
752         NULL,
753         gen_helper_mve_vfabsh,
754         gen_helper_mve_vfabss,
755         NULL,
756     };
757     if (!dc_isar_feature(aa32_mve_fp, s)) {
758         return false;
759     }
760     return do_1op(s, a, fns[a->size]);
761 }
762 
763 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
764 {
765     static MVEGenOneOpFn * const fns[] = {
766         NULL,
767         gen_helper_mve_vfnegh,
768         gen_helper_mve_vfnegs,
769         NULL,
770     };
771     if (!dc_isar_feature(aa32_mve_fp, s)) {
772         return false;
773     }
774     return do_1op(s, a, fns[a->size]);
775 }
776 
777 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
778 {
779     TCGv_ptr qd, qn, qm;
780 
781     if (!dc_isar_feature(aa32_mve, s) ||
782         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
783         !fn) {
784         return false;
785     }
786     if (!mve_eci_check(s) || !vfp_access_check(s)) {
787         return true;
788     }
789 
790     qd = mve_qreg_ptr(a->qd);
791     qn = mve_qreg_ptr(a->qn);
792     qm = mve_qreg_ptr(a->qm);
793     fn(cpu_env, qd, qn, qm);
794     tcg_temp_free_ptr(qd);
795     tcg_temp_free_ptr(qn);
796     tcg_temp_free_ptr(qm);
797     mve_update_eci(s);
798     return true;
799 }
800 
801 #define DO_LOGIC(INSN, HELPER)                                  \
802     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
803     {                                                           \
804         return do_2op(s, a, HELPER);                            \
805     }
806 
807 DO_LOGIC(VAND, gen_helper_mve_vand)
808 DO_LOGIC(VBIC, gen_helper_mve_vbic)
809 DO_LOGIC(VORR, gen_helper_mve_vorr)
810 DO_LOGIC(VORN, gen_helper_mve_vorn)
811 DO_LOGIC(VEOR, gen_helper_mve_veor)
812 
813 DO_LOGIC(VPSEL, gen_helper_mve_vpsel)
814 
815 #define DO_2OP(INSN, FN) \
816     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
817     {                                                           \
818         static MVEGenTwoOpFn * const fns[] = {                  \
819             gen_helper_mve_##FN##b,                             \
820             gen_helper_mve_##FN##h,                             \
821             gen_helper_mve_##FN##w,                             \
822             NULL,                                               \
823         };                                                      \
824         return do_2op(s, a, fns[a->size]);                      \
825     }
826 
827 DO_2OP(VADD, vadd)
828 DO_2OP(VSUB, vsub)
829 DO_2OP(VMUL, vmul)
830 DO_2OP(VMULH_S, vmulhs)
831 DO_2OP(VMULH_U, vmulhu)
832 DO_2OP(VRMULH_S, vrmulhs)
833 DO_2OP(VRMULH_U, vrmulhu)
834 DO_2OP(VMAX_S, vmaxs)
835 DO_2OP(VMAX_U, vmaxu)
836 DO_2OP(VMIN_S, vmins)
837 DO_2OP(VMIN_U, vminu)
838 DO_2OP(VABD_S, vabds)
839 DO_2OP(VABD_U, vabdu)
840 DO_2OP(VHADD_S, vhadds)
841 DO_2OP(VHADD_U, vhaddu)
842 DO_2OP(VHSUB_S, vhsubs)
843 DO_2OP(VHSUB_U, vhsubu)
844 DO_2OP(VMULL_BS, vmullbs)
845 DO_2OP(VMULL_BU, vmullbu)
846 DO_2OP(VMULL_TS, vmullts)
847 DO_2OP(VMULL_TU, vmulltu)
848 DO_2OP(VQDMULH, vqdmulh)
849 DO_2OP(VQRDMULH, vqrdmulh)
850 DO_2OP(VQADD_S, vqadds)
851 DO_2OP(VQADD_U, vqaddu)
852 DO_2OP(VQSUB_S, vqsubs)
853 DO_2OP(VQSUB_U, vqsubu)
854 DO_2OP(VSHL_S, vshls)
855 DO_2OP(VSHL_U, vshlu)
856 DO_2OP(VRSHL_S, vrshls)
857 DO_2OP(VRSHL_U, vrshlu)
858 DO_2OP(VQSHL_S, vqshls)
859 DO_2OP(VQSHL_U, vqshlu)
860 DO_2OP(VQRSHL_S, vqrshls)
861 DO_2OP(VQRSHL_U, vqrshlu)
862 DO_2OP(VQDMLADH, vqdmladh)
863 DO_2OP(VQDMLADHX, vqdmladhx)
864 DO_2OP(VQRDMLADH, vqrdmladh)
865 DO_2OP(VQRDMLADHX, vqrdmladhx)
866 DO_2OP(VQDMLSDH, vqdmlsdh)
867 DO_2OP(VQDMLSDHX, vqdmlsdhx)
868 DO_2OP(VQRDMLSDH, vqrdmlsdh)
869 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
870 DO_2OP(VRHADD_S, vrhadds)
871 DO_2OP(VRHADD_U, vrhaddu)
872 /*
873  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
874  * so we can reuse the DO_2OP macro. (Our implementation calculates the
875  * "expected" results in this case.) Similarly for VHCADD.
876  */
877 DO_2OP(VCADD90, vcadd90)
878 DO_2OP(VCADD270, vcadd270)
879 DO_2OP(VHCADD90, vhcadd90)
880 DO_2OP(VHCADD270, vhcadd270)
881 
882 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
883 {
884     static MVEGenTwoOpFn * const fns[] = {
885         NULL,
886         gen_helper_mve_vqdmullbh,
887         gen_helper_mve_vqdmullbw,
888         NULL,
889     };
890     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
891         /* UNPREDICTABLE; we choose to undef */
892         return false;
893     }
894     return do_2op(s, a, fns[a->size]);
895 }
896 
897 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
898 {
899     static MVEGenTwoOpFn * const fns[] = {
900         NULL,
901         gen_helper_mve_vqdmullth,
902         gen_helper_mve_vqdmulltw,
903         NULL,
904     };
905     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
906         /* UNPREDICTABLE; we choose to undef */
907         return false;
908     }
909     return do_2op(s, a, fns[a->size]);
910 }
911 
912 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
913 {
914     /*
915      * Note that a->size indicates the output size, ie VMULL.P8
916      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
917      * is the 16x16->32 operation and a->size is MO_32.
918      */
919     static MVEGenTwoOpFn * const fns[] = {
920         NULL,
921         gen_helper_mve_vmullpbh,
922         gen_helper_mve_vmullpbw,
923         NULL,
924     };
925     return do_2op(s, a, fns[a->size]);
926 }
927 
928 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
929 {
930     /* a->size is as for trans_VMULLP_B */
931     static MVEGenTwoOpFn * const fns[] = {
932         NULL,
933         gen_helper_mve_vmullpth,
934         gen_helper_mve_vmullptw,
935         NULL,
936     };
937     return do_2op(s, a, fns[a->size]);
938 }
939 
940 /*
941  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
942  * of the 32-bit elements in each lane of the input vectors, where the
943  * carry-out of each add is the carry-in of the next.  The initial carry
944  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
945  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
946  * These insns are subject to beat-wise execution.  Partial execution
947  * of an I=1 (initial carry input fixed) insn which does not
948  * execute the first beat must start with the current FPSCR.NZCV
949  * value, not the fixed constant input.
950  */
951 static bool trans_VADC(DisasContext *s, arg_2op *a)
952 {
953     return do_2op(s, a, gen_helper_mve_vadc);
954 }
955 
956 static bool trans_VADCI(DisasContext *s, arg_2op *a)
957 {
958     if (mve_skip_first_beat(s)) {
959         return trans_VADC(s, a);
960     }
961     return do_2op(s, a, gen_helper_mve_vadci);
962 }
963 
964 static bool trans_VSBC(DisasContext *s, arg_2op *a)
965 {
966     return do_2op(s, a, gen_helper_mve_vsbc);
967 }
968 
969 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
970 {
971     if (mve_skip_first_beat(s)) {
972         return trans_VSBC(s, a);
973     }
974     return do_2op(s, a, gen_helper_mve_vsbci);
975 }
976 
977 #define DO_2OP_FP(INSN, FN)                                     \
978     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
979     {                                                           \
980         static MVEGenTwoOpFn * const fns[] = {                  \
981             NULL,                                               \
982             gen_helper_mve_##FN##h,                             \
983             gen_helper_mve_##FN##s,                             \
984             NULL,                                               \
985         };                                                      \
986         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
987             return false;                                       \
988         }                                                       \
989         return do_2op(s, a, fns[a->size]);                      \
990     }
991 
992 DO_2OP_FP(VADD_fp, vfadd)
993 DO_2OP_FP(VSUB_fp, vfsub)
994 DO_2OP_FP(VMUL_fp, vfmul)
995 DO_2OP_FP(VABD_fp, vfabd)
996 DO_2OP_FP(VMAXNM, vmaxnm)
997 DO_2OP_FP(VMINNM, vminnm)
998 DO_2OP_FP(VCADD90_fp, vfcadd90)
999 DO_2OP_FP(VCADD270_fp, vfcadd270)
1000 DO_2OP_FP(VFMA, vfma)
1001 DO_2OP_FP(VFMS, vfms)
1002 DO_2OP_FP(VCMUL0, vcmul0)
1003 DO_2OP_FP(VCMUL90, vcmul90)
1004 DO_2OP_FP(VCMUL180, vcmul180)
1005 DO_2OP_FP(VCMUL270, vcmul270)
1006 DO_2OP_FP(VCMLA0, vcmla0)
1007 DO_2OP_FP(VCMLA90, vcmla90)
1008 DO_2OP_FP(VCMLA180, vcmla180)
1009 DO_2OP_FP(VCMLA270, vcmla270)
1010 DO_2OP_FP(VMAXNMA, vmaxnma)
1011 DO_2OP_FP(VMINNMA, vminnma)
1012 
1013 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
1014                           MVEGenTwoOpScalarFn fn)
1015 {
1016     TCGv_ptr qd, qn;
1017     TCGv_i32 rm;
1018 
1019     if (!dc_isar_feature(aa32_mve, s) ||
1020         !mve_check_qreg_bank(s, a->qd | a->qn) ||
1021         !fn) {
1022         return false;
1023     }
1024     if (a->rm == 13 || a->rm == 15) {
1025         /* UNPREDICTABLE */
1026         return false;
1027     }
1028     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1029         return true;
1030     }
1031 
1032     qd = mve_qreg_ptr(a->qd);
1033     qn = mve_qreg_ptr(a->qn);
1034     rm = load_reg(s, a->rm);
1035     fn(cpu_env, qd, qn, rm);
1036     tcg_temp_free_i32(rm);
1037     tcg_temp_free_ptr(qd);
1038     tcg_temp_free_ptr(qn);
1039     mve_update_eci(s);
1040     return true;
1041 }
1042 
1043 #define DO_2OP_SCALAR(INSN, FN)                                 \
1044     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
1045     {                                                           \
1046         static MVEGenTwoOpScalarFn * const fns[] = {            \
1047             gen_helper_mve_##FN##b,                             \
1048             gen_helper_mve_##FN##h,                             \
1049             gen_helper_mve_##FN##w,                             \
1050             NULL,                                               \
1051         };                                                      \
1052         return do_2op_scalar(s, a, fns[a->size]);               \
1053     }
1054 
1055 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
1056 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
1057 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
1058 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
1059 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
1060 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
1061 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
1062 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
1063 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
1064 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
1065 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
1066 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
1067 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
1068 DO_2OP_SCALAR(VBRSR, vbrsr)
1069 DO_2OP_SCALAR(VMLA, vmla)
1070 DO_2OP_SCALAR(VMLAS, vmlas)
1071 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
1072 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
1073 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
1074 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
1075 
1076 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
1077 {
1078     static MVEGenTwoOpScalarFn * const fns[] = {
1079         NULL,
1080         gen_helper_mve_vqdmullb_scalarh,
1081         gen_helper_mve_vqdmullb_scalarw,
1082         NULL,
1083     };
1084     if (a->qd == a->qn && a->size == MO_32) {
1085         /* UNPREDICTABLE; we choose to undef */
1086         return false;
1087     }
1088     return do_2op_scalar(s, a, fns[a->size]);
1089 }
1090 
1091 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
1092 {
1093     static MVEGenTwoOpScalarFn * const fns[] = {
1094         NULL,
1095         gen_helper_mve_vqdmullt_scalarh,
1096         gen_helper_mve_vqdmullt_scalarw,
1097         NULL,
1098     };
1099     if (a->qd == a->qn && a->size == MO_32) {
1100         /* UNPREDICTABLE; we choose to undef */
1101         return false;
1102     }
1103     return do_2op_scalar(s, a, fns[a->size]);
1104 }
1105 
1106 
1107 #define DO_2OP_FP_SCALAR(INSN, FN)                              \
1108     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
1109     {                                                           \
1110         static MVEGenTwoOpScalarFn * const fns[] = {            \
1111             NULL,                                               \
1112             gen_helper_mve_##FN##h,                             \
1113             gen_helper_mve_##FN##s,                             \
1114             NULL,                                               \
1115         };                                                      \
1116         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
1117             return false;                                       \
1118         }                                                       \
1119         return do_2op_scalar(s, a, fns[a->size]);               \
1120     }
1121 
1122 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar)
1123 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar)
1124 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar)
1125 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar)
1126 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar)
1127 
1128 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
1129                              MVEGenLongDualAccOpFn *fn)
1130 {
1131     TCGv_ptr qn, qm;
1132     TCGv_i64 rda;
1133     TCGv_i32 rdalo, rdahi;
1134 
1135     if (!dc_isar_feature(aa32_mve, s) ||
1136         !mve_check_qreg_bank(s, a->qn | a->qm) ||
1137         !fn) {
1138         return false;
1139     }
1140     /*
1141      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1142      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1143      */
1144     if (a->rdahi == 13 || a->rdahi == 15) {
1145         return false;
1146     }
1147     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1148         return true;
1149     }
1150 
1151     qn = mve_qreg_ptr(a->qn);
1152     qm = mve_qreg_ptr(a->qm);
1153 
1154     /*
1155      * This insn is subject to beat-wise execution. Partial execution
1156      * of an A=0 (no-accumulate) insn which does not execute the first
1157      * beat must start with the current rda value, not 0.
1158      */
1159     if (a->a || mve_skip_first_beat(s)) {
1160         rda = tcg_temp_new_i64();
1161         rdalo = load_reg(s, a->rdalo);
1162         rdahi = load_reg(s, a->rdahi);
1163         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
1164         tcg_temp_free_i32(rdalo);
1165         tcg_temp_free_i32(rdahi);
1166     } else {
1167         rda = tcg_const_i64(0);
1168     }
1169 
1170     fn(rda, cpu_env, qn, qm, rda);
1171     tcg_temp_free_ptr(qn);
1172     tcg_temp_free_ptr(qm);
1173 
1174     rdalo = tcg_temp_new_i32();
1175     rdahi = tcg_temp_new_i32();
1176     tcg_gen_extrl_i64_i32(rdalo, rda);
1177     tcg_gen_extrh_i64_i32(rdahi, rda);
1178     store_reg(s, a->rdalo, rdalo);
1179     store_reg(s, a->rdahi, rdahi);
1180     tcg_temp_free_i64(rda);
1181     mve_update_eci(s);
1182     return true;
1183 }
1184 
1185 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
1186 {
1187     static MVEGenLongDualAccOpFn * const fns[4][2] = {
1188         { NULL, NULL },
1189         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
1190         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
1191         { NULL, NULL },
1192     };
1193     return do_long_dual_acc(s, a, fns[a->size][a->x]);
1194 }
1195 
1196 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
1197 {
1198     static MVEGenLongDualAccOpFn * const fns[4][2] = {
1199         { NULL, NULL },
1200         { gen_helper_mve_vmlaldavuh, NULL },
1201         { gen_helper_mve_vmlaldavuw, NULL },
1202         { NULL, NULL },
1203     };
1204     return do_long_dual_acc(s, a, fns[a->size][a->x]);
1205 }
1206 
1207 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
1208 {
1209     static MVEGenLongDualAccOpFn * const fns[4][2] = {
1210         { NULL, NULL },
1211         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
1212         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
1213         { NULL, NULL },
1214     };
1215     return do_long_dual_acc(s, a, fns[a->size][a->x]);
1216 }
1217 
1218 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
1219 {
1220     static MVEGenLongDualAccOpFn * const fns[] = {
1221         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
1222     };
1223     return do_long_dual_acc(s, a, fns[a->x]);
1224 }
1225 
1226 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
1227 {
1228     static MVEGenLongDualAccOpFn * const fns[] = {
1229         gen_helper_mve_vrmlaldavhuw, NULL,
1230     };
1231     return do_long_dual_acc(s, a, fns[a->x]);
1232 }
1233 
1234 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
1235 {
1236     static MVEGenLongDualAccOpFn * const fns[] = {
1237         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
1238     };
1239     return do_long_dual_acc(s, a, fns[a->x]);
1240 }
1241 
1242 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
1243 {
1244     TCGv_ptr qn, qm;
1245     TCGv_i32 rda;
1246 
1247     if (!dc_isar_feature(aa32_mve, s) ||
1248         !mve_check_qreg_bank(s, a->qn) ||
1249         !fn) {
1250         return false;
1251     }
1252     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1253         return true;
1254     }
1255 
1256     qn = mve_qreg_ptr(a->qn);
1257     qm = mve_qreg_ptr(a->qm);
1258 
1259     /*
1260      * This insn is subject to beat-wise execution. Partial execution
1261      * of an A=0 (no-accumulate) insn which does not execute the first
1262      * beat must start with the current rda value, not 0.
1263      */
1264     if (a->a || mve_skip_first_beat(s)) {
1265         rda = load_reg(s, a->rda);
1266     } else {
1267         rda = tcg_const_i32(0);
1268     }
1269 
1270     fn(rda, cpu_env, qn, qm, rda);
1271     store_reg(s, a->rda, rda);
1272     tcg_temp_free_ptr(qn);
1273     tcg_temp_free_ptr(qm);
1274 
1275     mve_update_eci(s);
1276     return true;
1277 }
1278 
1279 #define DO_DUAL_ACC(INSN, FN)                                           \
1280     static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
1281     {                                                                   \
1282         static MVEGenDualAccOpFn * const fns[4][2] = {                  \
1283             { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
1284             { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
1285             { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
1286             { NULL, NULL },                                             \
1287         };                                                              \
1288         return do_dual_acc(s, a, fns[a->size][a->x]);                   \
1289     }
1290 
1291 DO_DUAL_ACC(VMLADAV_S, vmladavs)
1292 DO_DUAL_ACC(VMLSDAV, vmlsdav)
1293 
1294 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
1295 {
1296     static MVEGenDualAccOpFn * const fns[4][2] = {
1297         { gen_helper_mve_vmladavub, NULL },
1298         { gen_helper_mve_vmladavuh, NULL },
1299         { gen_helper_mve_vmladavuw, NULL },
1300         { NULL, NULL },
1301     };
1302     return do_dual_acc(s, a, fns[a->size][a->x]);
1303 }
1304 
1305 static void gen_vpst(DisasContext *s, uint32_t mask)
1306 {
1307     /*
1308      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
1309      * being adjacent fields in the register.
1310      *
1311      * Updating the masks is not predicated, but it is subject to beat-wise
1312      * execution, and the mask is updated on the odd-numbered beats.
1313      * So if PSR.ECI says we should skip beat 1, we mustn't update the
1314      * 01 mask field.
1315      */
1316     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
1317     switch (s->eci) {
1318     case ECI_NONE:
1319     case ECI_A0:
1320         /* Update both 01 and 23 fields */
1321         tcg_gen_deposit_i32(vpr, vpr,
1322                             tcg_constant_i32(mask | (mask << 4)),
1323                             R_V7M_VPR_MASK01_SHIFT,
1324                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
1325         break;
1326     case ECI_A0A1:
1327     case ECI_A0A1A2:
1328     case ECI_A0A1A2B0:
1329         /* Update only the 23 mask field */
1330         tcg_gen_deposit_i32(vpr, vpr,
1331                             tcg_constant_i32(mask),
1332                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
1333         break;
1334     default:
1335         g_assert_not_reached();
1336     }
1337     store_cpu_field(vpr, v7m.vpr);
1338 }
1339 
1340 static bool trans_VPST(DisasContext *s, arg_VPST *a)
1341 {
1342     /* mask == 0 is a "related encoding" */
1343     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
1344         return false;
1345     }
1346     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1347         return true;
1348     }
1349     gen_vpst(s, a->mask);
1350     mve_update_and_store_eci(s);
1351     return true;
1352 }
1353 
1354 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
1355 {
1356     /*
1357      * Invert the predicate in VPR.P0. We have call out to
1358      * a helper because this insn itself is beatwise and can
1359      * be predicated.
1360      */
1361     if (!dc_isar_feature(aa32_mve, s)) {
1362         return false;
1363     }
1364     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1365         return true;
1366     }
1367 
1368     gen_helper_mve_vpnot(cpu_env);
1369     mve_update_eci(s);
1370     return true;
1371 }
1372 
1373 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
1374 {
1375     /* VADDV: vector add across vector */
1376     static MVEGenVADDVFn * const fns[4][2] = {
1377         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
1378         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
1379         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
1380         { NULL, NULL }
1381     };
1382     TCGv_ptr qm;
1383     TCGv_i32 rda;
1384 
1385     if (!dc_isar_feature(aa32_mve, s) ||
1386         a->size == 3) {
1387         return false;
1388     }
1389     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1390         return true;
1391     }
1392 
1393     /*
1394      * This insn is subject to beat-wise execution. Partial execution
1395      * of an A=0 (no-accumulate) insn which does not execute the first
1396      * beat must start with the current value of Rda, not zero.
1397      */
1398     if (a->a || mve_skip_first_beat(s)) {
1399         /* Accumulate input from Rda */
1400         rda = load_reg(s, a->rda);
1401     } else {
1402         /* Accumulate starting at zero */
1403         rda = tcg_const_i32(0);
1404     }
1405 
1406     qm = mve_qreg_ptr(a->qm);
1407     fns[a->size][a->u](rda, cpu_env, qm, rda);
1408     store_reg(s, a->rda, rda);
1409     tcg_temp_free_ptr(qm);
1410 
1411     mve_update_eci(s);
1412     return true;
1413 }
1414 
1415 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
1416 {
1417     /*
1418      * Vector Add Long Across Vector: accumulate the 32-bit
1419      * elements of the vector into a 64-bit result stored in
1420      * a pair of general-purpose registers.
1421      * No need to check Qm's bank: it is only 3 bits in decode.
1422      */
1423     TCGv_ptr qm;
1424     TCGv_i64 rda;
1425     TCGv_i32 rdalo, rdahi;
1426 
1427     if (!dc_isar_feature(aa32_mve, s)) {
1428         return false;
1429     }
1430     /*
1431      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1432      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1433      */
1434     if (a->rdahi == 13 || a->rdahi == 15) {
1435         return false;
1436     }
1437     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1438         return true;
1439     }
1440 
1441     /*
1442      * This insn is subject to beat-wise execution. Partial execution
1443      * of an A=0 (no-accumulate) insn which does not execute the first
1444      * beat must start with the current value of RdaHi:RdaLo, not zero.
1445      */
1446     if (a->a || mve_skip_first_beat(s)) {
1447         /* Accumulate input from RdaHi:RdaLo */
1448         rda = tcg_temp_new_i64();
1449         rdalo = load_reg(s, a->rdalo);
1450         rdahi = load_reg(s, a->rdahi);
1451         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
1452         tcg_temp_free_i32(rdalo);
1453         tcg_temp_free_i32(rdahi);
1454     } else {
1455         /* Accumulate starting at zero */
1456         rda = tcg_const_i64(0);
1457     }
1458 
1459     qm = mve_qreg_ptr(a->qm);
1460     if (a->u) {
1461         gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
1462     } else {
1463         gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
1464     }
1465     tcg_temp_free_ptr(qm);
1466 
1467     rdalo = tcg_temp_new_i32();
1468     rdahi = tcg_temp_new_i32();
1469     tcg_gen_extrl_i64_i32(rdalo, rda);
1470     tcg_gen_extrh_i64_i32(rdahi, rda);
1471     store_reg(s, a->rdalo, rdalo);
1472     store_reg(s, a->rdahi, rdahi);
1473     tcg_temp_free_i64(rda);
1474     mve_update_eci(s);
1475     return true;
1476 }
1477 
1478 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
1479 {
1480     TCGv_ptr qd;
1481     uint64_t imm;
1482 
1483     if (!dc_isar_feature(aa32_mve, s) ||
1484         !mve_check_qreg_bank(s, a->qd) ||
1485         !fn) {
1486         return false;
1487     }
1488     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1489         return true;
1490     }
1491 
1492     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1493 
1494     qd = mve_qreg_ptr(a->qd);
1495     fn(cpu_env, qd, tcg_constant_i64(imm));
1496     tcg_temp_free_ptr(qd);
1497     mve_update_eci(s);
1498     return true;
1499 }
1500 
1501 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1502 {
1503     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1504     MVEGenOneOpImmFn *fn;
1505 
1506     if ((a->cmode & 1) && a->cmode < 12) {
1507         if (a->op) {
1508             /*
1509              * For op=1, the immediate will be inverted by asimd_imm_const(),
1510              * so the VBIC becomes a logical AND operation.
1511              */
1512             fn = gen_helper_mve_vandi;
1513         } else {
1514             fn = gen_helper_mve_vorri;
1515         }
1516     } else {
1517         /* There is one unallocated cmode/op combination in this space */
1518         if (a->cmode == 15 && a->op == 1) {
1519             return false;
1520         }
1521         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1522         fn = gen_helper_mve_vmovi;
1523     }
1524     return do_1imm(s, a, fn);
1525 }
1526 
1527 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1528                       bool negateshift)
1529 {
1530     TCGv_ptr qd, qm;
1531     int shift = a->shift;
1532 
1533     if (!dc_isar_feature(aa32_mve, s) ||
1534         !mve_check_qreg_bank(s, a->qd | a->qm) ||
1535         !fn) {
1536         return false;
1537     }
1538     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1539         return true;
1540     }
1541 
1542     /*
1543      * When we handle a right shift insn using a left-shift helper
1544      * which permits a negative shift count to indicate a right-shift,
1545      * we must negate the shift count.
1546      */
1547     if (negateshift) {
1548         shift = -shift;
1549     }
1550 
1551     qd = mve_qreg_ptr(a->qd);
1552     qm = mve_qreg_ptr(a->qm);
1553     fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1554     tcg_temp_free_ptr(qd);
1555     tcg_temp_free_ptr(qm);
1556     mve_update_eci(s);
1557     return true;
1558 }
1559 
1560 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)                         \
1561     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1562     {                                                           \
1563         static MVEGenTwoOpShiftFn * const fns[] = {             \
1564             gen_helper_mve_##FN##b,                             \
1565             gen_helper_mve_##FN##h,                             \
1566             gen_helper_mve_##FN##w,                             \
1567             NULL,                                               \
1568         };                                                      \
1569         return do_2shift(s, a, fns[a->size], NEGATESHIFT);      \
1570     }
1571 
1572 DO_2SHIFT(VSHLI, vshli_u, false)
1573 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1574 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1575 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1576 /* These right shifts use a left-shift helper with negated shift count */
1577 DO_2SHIFT(VSHRI_S, vshli_s, true)
1578 DO_2SHIFT(VSHRI_U, vshli_u, true)
1579 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1580 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1581 
1582 DO_2SHIFT(VSRI, vsri, false)
1583 DO_2SHIFT(VSLI, vsli, false)
1584 
1585 #define DO_2SHIFT_FP(INSN, FN)                                  \
1586     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1587     {                                                           \
1588         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
1589             return false;                                       \
1590         }                                                       \
1591         return do_2shift(s, a, gen_helper_mve_##FN, false);     \
1592     }
1593 
1594 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh)
1595 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh)
1596 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs)
1597 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu)
1598 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf)
1599 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf)
1600 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs)
1601 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu)
1602 
1603 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1604                              MVEGenTwoOpShiftFn *fn)
1605 {
1606     TCGv_ptr qda;
1607     TCGv_i32 rm;
1608 
1609     if (!dc_isar_feature(aa32_mve, s) ||
1610         !mve_check_qreg_bank(s, a->qda) ||
1611         a->rm == 13 || a->rm == 15 || !fn) {
1612         /* Rm cases are UNPREDICTABLE */
1613         return false;
1614     }
1615     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1616         return true;
1617     }
1618 
1619     qda = mve_qreg_ptr(a->qda);
1620     rm = load_reg(s, a->rm);
1621     fn(cpu_env, qda, qda, rm);
1622     tcg_temp_free_ptr(qda);
1623     tcg_temp_free_i32(rm);
1624     mve_update_eci(s);
1625     return true;
1626 }
1627 
1628 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
1629     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
1630     {                                                                   \
1631         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1632             gen_helper_mve_##FN##b,                                     \
1633             gen_helper_mve_##FN##h,                                     \
1634             gen_helper_mve_##FN##w,                                     \
1635             NULL,                                                       \
1636         };                                                              \
1637         return do_2shift_scalar(s, a, fns[a->size]);                    \
1638     }
1639 
1640 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1641 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1642 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1643 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1644 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1645 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1646 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1647 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1648 
1649 #define DO_VSHLL(INSN, FN)                                      \
1650     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1651     {                                                           \
1652         static MVEGenTwoOpShiftFn * const fns[] = {             \
1653             gen_helper_mve_##FN##b,                             \
1654             gen_helper_mve_##FN##h,                             \
1655         };                                                      \
1656         return do_2shift(s, a, fns[a->size], false);            \
1657     }
1658 
1659 DO_VSHLL(VSHLL_BS, vshllbs)
1660 DO_VSHLL(VSHLL_BU, vshllbu)
1661 DO_VSHLL(VSHLL_TS, vshllts)
1662 DO_VSHLL(VSHLL_TU, vshlltu)
1663 
1664 #define DO_2SHIFT_N(INSN, FN)                                   \
1665     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1666     {                                                           \
1667         static MVEGenTwoOpShiftFn * const fns[] = {             \
1668             gen_helper_mve_##FN##b,                             \
1669             gen_helper_mve_##FN##h,                             \
1670         };                                                      \
1671         return do_2shift(s, a, fns[a->size], false);            \
1672     }
1673 
1674 DO_2SHIFT_N(VSHRNB, vshrnb)
1675 DO_2SHIFT_N(VSHRNT, vshrnt)
1676 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1677 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1678 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1679 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1680 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1681 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1682 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1683 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1684 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1685 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1686 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1687 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1688 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1689 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1690 
1691 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1692 {
1693     /*
1694      * Whole Vector Left Shift with Carry. The carry is taken
1695      * from a general purpose register and written back there.
1696      * An imm of 0 means "shift by 32".
1697      */
1698     TCGv_ptr qd;
1699     TCGv_i32 rdm;
1700 
1701     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1702         return false;
1703     }
1704     if (a->rdm == 13 || a->rdm == 15) {
1705         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1706         return false;
1707     }
1708     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1709         return true;
1710     }
1711 
1712     qd = mve_qreg_ptr(a->qd);
1713     rdm = load_reg(s, a->rdm);
1714     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1715     store_reg(s, a->rdm, rdm);
1716     tcg_temp_free_ptr(qd);
1717     mve_update_eci(s);
1718     return true;
1719 }
1720 
1721 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1722 {
1723     TCGv_ptr qd;
1724     TCGv_i32 rn;
1725 
1726     /*
1727      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1728      * This fills the vector with elements of successively increasing
1729      * or decreasing values, starting from Rn.
1730      */
1731     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1732         return false;
1733     }
1734     if (a->size == MO_64) {
1735         /* size 0b11 is another encoding */
1736         return false;
1737     }
1738     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1739         return true;
1740     }
1741 
1742     qd = mve_qreg_ptr(a->qd);
1743     rn = load_reg(s, a->rn);
1744     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1745     store_reg(s, a->rn, rn);
1746     tcg_temp_free_ptr(qd);
1747     mve_update_eci(s);
1748     return true;
1749 }
1750 
1751 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1752 {
1753     TCGv_ptr qd;
1754     TCGv_i32 rn, rm;
1755 
1756     /*
1757      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1758      * This fills the vector with elements of successively increasing
1759      * or decreasing values, starting from Rn. Rm specifies a point where
1760      * the count wraps back around to 0. The updated offset is written back
1761      * to Rn.
1762      */
1763     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1764         return false;
1765     }
1766     if (!fn || a->rm == 13 || a->rm == 15) {
1767         /*
1768          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1769          * Rm == 13 is VIWDUP, VDWDUP.
1770          */
1771         return false;
1772     }
1773     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1774         return true;
1775     }
1776 
1777     qd = mve_qreg_ptr(a->qd);
1778     rn = load_reg(s, a->rn);
1779     rm = load_reg(s, a->rm);
1780     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1781     store_reg(s, a->rn, rn);
1782     tcg_temp_free_ptr(qd);
1783     tcg_temp_free_i32(rm);
1784     mve_update_eci(s);
1785     return true;
1786 }
1787 
1788 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1789 {
1790     static MVEGenVIDUPFn * const fns[] = {
1791         gen_helper_mve_vidupb,
1792         gen_helper_mve_viduph,
1793         gen_helper_mve_vidupw,
1794         NULL,
1795     };
1796     return do_vidup(s, a, fns[a->size]);
1797 }
1798 
1799 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1800 {
1801     static MVEGenVIDUPFn * const fns[] = {
1802         gen_helper_mve_vidupb,
1803         gen_helper_mve_viduph,
1804         gen_helper_mve_vidupw,
1805         NULL,
1806     };
1807     /* VDDUP is just like VIDUP but with a negative immediate */
1808     a->imm = -a->imm;
1809     return do_vidup(s, a, fns[a->size]);
1810 }
1811 
1812 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1813 {
1814     static MVEGenVIWDUPFn * const fns[] = {
1815         gen_helper_mve_viwdupb,
1816         gen_helper_mve_viwduph,
1817         gen_helper_mve_viwdupw,
1818         NULL,
1819     };
1820     return do_viwdup(s, a, fns[a->size]);
1821 }
1822 
1823 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1824 {
1825     static MVEGenVIWDUPFn * const fns[] = {
1826         gen_helper_mve_vdwdupb,
1827         gen_helper_mve_vdwduph,
1828         gen_helper_mve_vdwdupw,
1829         NULL,
1830     };
1831     return do_viwdup(s, a, fns[a->size]);
1832 }
1833 
1834 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1835 {
1836     TCGv_ptr qn, qm;
1837 
1838     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1839         !fn) {
1840         return false;
1841     }
1842     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1843         return true;
1844     }
1845 
1846     qn = mve_qreg_ptr(a->qn);
1847     qm = mve_qreg_ptr(a->qm);
1848     fn(cpu_env, qn, qm);
1849     tcg_temp_free_ptr(qn);
1850     tcg_temp_free_ptr(qm);
1851     if (a->mask) {
1852         /* VPT */
1853         gen_vpst(s, a->mask);
1854     }
1855     mve_update_eci(s);
1856     return true;
1857 }
1858 
1859 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1860                            MVEGenScalarCmpFn *fn)
1861 {
1862     TCGv_ptr qn;
1863     TCGv_i32 rm;
1864 
1865     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1866         return false;
1867     }
1868     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1869         return true;
1870     }
1871 
1872     qn = mve_qreg_ptr(a->qn);
1873     if (a->rm == 15) {
1874         /* Encoding Rm=0b1111 means "constant zero" */
1875         rm = tcg_constant_i32(0);
1876     } else {
1877         rm = load_reg(s, a->rm);
1878     }
1879     fn(cpu_env, qn, rm);
1880     tcg_temp_free_ptr(qn);
1881     tcg_temp_free_i32(rm);
1882     if (a->mask) {
1883         /* VPT */
1884         gen_vpst(s, a->mask);
1885     }
1886     mve_update_eci(s);
1887     return true;
1888 }
1889 
1890 #define DO_VCMP(INSN, FN)                                       \
1891     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
1892     {                                                           \
1893         static MVEGenCmpFn * const fns[] = {                    \
1894             gen_helper_mve_##FN##b,                             \
1895             gen_helper_mve_##FN##h,                             \
1896             gen_helper_mve_##FN##w,                             \
1897             NULL,                                               \
1898         };                                                      \
1899         return do_vcmp(s, a, fns[a->size]);                     \
1900     }                                                           \
1901     static bool trans_##INSN##_scalar(DisasContext *s,          \
1902                                       arg_vcmp_scalar *a)       \
1903     {                                                           \
1904         static MVEGenScalarCmpFn * const fns[] = {              \
1905             gen_helper_mve_##FN##_scalarb,                      \
1906             gen_helper_mve_##FN##_scalarh,                      \
1907             gen_helper_mve_##FN##_scalarw,                      \
1908             NULL,                                               \
1909         };                                                      \
1910         return do_vcmp_scalar(s, a, fns[a->size]);              \
1911     }
1912 
1913 DO_VCMP(VCMPEQ, vcmpeq)
1914 DO_VCMP(VCMPNE, vcmpne)
1915 DO_VCMP(VCMPCS, vcmpcs)
1916 DO_VCMP(VCMPHI, vcmphi)
1917 DO_VCMP(VCMPGE, vcmpge)
1918 DO_VCMP(VCMPLT, vcmplt)
1919 DO_VCMP(VCMPGT, vcmpgt)
1920 DO_VCMP(VCMPLE, vcmple)
1921 
1922 #define DO_VCMP_FP(INSN, FN)                                    \
1923     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
1924     {                                                           \
1925         static MVEGenCmpFn * const fns[] = {                    \
1926             NULL,                                               \
1927             gen_helper_mve_##FN##h,                             \
1928             gen_helper_mve_##FN##s,                             \
1929             NULL,                                               \
1930         };                                                      \
1931         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
1932             return false;                                       \
1933         }                                                       \
1934         return do_vcmp(s, a, fns[a->size]);                     \
1935     }                                                           \
1936     static bool trans_##INSN##_scalar(DisasContext *s,          \
1937                                       arg_vcmp_scalar *a)       \
1938     {                                                           \
1939         static MVEGenScalarCmpFn * const fns[] = {              \
1940             NULL,                                               \
1941             gen_helper_mve_##FN##_scalarh,                      \
1942             gen_helper_mve_##FN##_scalars,                      \
1943             NULL,                                               \
1944         };                                                      \
1945         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
1946             return false;                                       \
1947         }                                                       \
1948         return do_vcmp_scalar(s, a, fns[a->size]);              \
1949     }
1950 
1951 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq)
1952 DO_VCMP_FP(VCMPNE_fp, vfcmpne)
1953 DO_VCMP_FP(VCMPGE_fp, vfcmpge)
1954 DO_VCMP_FP(VCMPLT_fp, vfcmplt)
1955 DO_VCMP_FP(VCMPGT_fp, vfcmpgt)
1956 DO_VCMP_FP(VCMPLE_fp, vfcmple)
1957 
1958 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
1959 {
1960     /*
1961      * MIN/MAX operations across a vector: compute the min or
1962      * max of the initial value in a general purpose register
1963      * and all the elements in the vector, and store it back
1964      * into the general purpose register.
1965      */
1966     TCGv_ptr qm;
1967     TCGv_i32 rda;
1968 
1969     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1970         !fn || a->rda == 13 || a->rda == 15) {
1971         /* Rda cases are UNPREDICTABLE */
1972         return false;
1973     }
1974     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1975         return true;
1976     }
1977 
1978     qm = mve_qreg_ptr(a->qm);
1979     rda = load_reg(s, a->rda);
1980     fn(rda, cpu_env, qm, rda);
1981     store_reg(s, a->rda, rda);
1982     tcg_temp_free_ptr(qm);
1983     mve_update_eci(s);
1984     return true;
1985 }
1986 
1987 #define DO_VMAXV(INSN, FN)                                      \
1988     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
1989     {                                                           \
1990         static MVEGenVADDVFn * const fns[] = {                  \
1991             gen_helper_mve_##FN##b,                             \
1992             gen_helper_mve_##FN##h,                             \
1993             gen_helper_mve_##FN##w,                             \
1994             NULL,                                               \
1995         };                                                      \
1996         return do_vmaxv(s, a, fns[a->size]);                    \
1997     }
1998 
1999 DO_VMAXV(VMAXV_S, vmaxvs)
2000 DO_VMAXV(VMAXV_U, vmaxvu)
2001 DO_VMAXV(VMAXAV, vmaxav)
2002 DO_VMAXV(VMINV_S, vminvs)
2003 DO_VMAXV(VMINV_U, vminvu)
2004 DO_VMAXV(VMINAV, vminav)
2005 
2006 #define DO_VMAXV_FP(INSN, FN)                                   \
2007     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
2008     {                                                           \
2009         static MVEGenVADDVFn * const fns[] = {                  \
2010             NULL,                                               \
2011             gen_helper_mve_##FN##h,                             \
2012             gen_helper_mve_##FN##s,                             \
2013             NULL,                                               \
2014         };                                                      \
2015         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
2016             return false;                                       \
2017         }                                                       \
2018         return do_vmaxv(s, a, fns[a->size]);                    \
2019     }
2020 
2021 DO_VMAXV_FP(VMAXNMV, vmaxnmv)
2022 DO_VMAXV_FP(VMINNMV, vminnmv)
2023 DO_VMAXV_FP(VMAXNMAV, vmaxnmav)
2024 DO_VMAXV_FP(VMINNMAV, vminnmav)
2025 
2026 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
2027 {
2028     /* Absolute difference accumulated across vector */
2029     TCGv_ptr qn, qm;
2030     TCGv_i32 rda;
2031 
2032     if (!dc_isar_feature(aa32_mve, s) ||
2033         !mve_check_qreg_bank(s, a->qm | a->qn) ||
2034         !fn || a->rda == 13 || a->rda == 15) {
2035         /* Rda cases are UNPREDICTABLE */
2036         return false;
2037     }
2038     if (!mve_eci_check(s) || !vfp_access_check(s)) {
2039         return true;
2040     }
2041 
2042     qm = mve_qreg_ptr(a->qm);
2043     qn = mve_qreg_ptr(a->qn);
2044     rda = load_reg(s, a->rda);
2045     fn(rda, cpu_env, qn, qm, rda);
2046     store_reg(s, a->rda, rda);
2047     tcg_temp_free_ptr(qm);
2048     tcg_temp_free_ptr(qn);
2049     mve_update_eci(s);
2050     return true;
2051 }
2052 
2053 #define DO_VABAV(INSN, FN)                                      \
2054     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
2055     {                                                           \
2056         static MVEGenVABAVFn * const fns[] = {                  \
2057             gen_helper_mve_##FN##b,                             \
2058             gen_helper_mve_##FN##h,                             \
2059             gen_helper_mve_##FN##w,                             \
2060             NULL,                                               \
2061         };                                                      \
2062         return do_vabav(s, a, fns[a->size]);                    \
2063     }
2064 
2065 DO_VABAV(VABAV_S, vabavs)
2066 DO_VABAV(VABAV_U, vabavu)
2067 
2068 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
2069 {
2070     /*
2071      * VMOV two 32-bit vector lanes to two general-purpose registers.
2072      * This insn is not predicated but it is subject to beat-wise
2073      * execution if it is not in an IT block. For us this means
2074      * only that if PSR.ECI says we should not be executing the beat
2075      * corresponding to the lane of the vector register being accessed
2076      * then we should skip perfoming the move, and that we need to do
2077      * the usual check for bad ECI state and advance of ECI state.
2078      * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2079      */
2080     TCGv_i32 tmp;
2081     int vd;
2082 
2083     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
2084         a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
2085         a->rt == a->rt2) {
2086         /* Rt/Rt2 cases are UNPREDICTABLE */
2087         return false;
2088     }
2089     if (!mve_eci_check(s) || !vfp_access_check(s)) {
2090         return true;
2091     }
2092 
2093     /* Convert Qreg index to Dreg for read_neon_element32() etc */
2094     vd = a->qd * 2;
2095 
2096     if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
2097         tmp = tcg_temp_new_i32();
2098         read_neon_element32(tmp, vd, a->idx, MO_32);
2099         store_reg(s, a->rt, tmp);
2100     }
2101     if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
2102         tmp = tcg_temp_new_i32();
2103         read_neon_element32(tmp, vd + 1, a->idx, MO_32);
2104         store_reg(s, a->rt2, tmp);
2105     }
2106 
2107     mve_update_and_store_eci(s);
2108     return true;
2109 }
2110 
2111 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
2112 {
2113     /*
2114      * VMOV two general-purpose registers to two 32-bit vector lanes.
2115      * This insn is not predicated but it is subject to beat-wise
2116      * execution if it is not in an IT block. For us this means
2117      * only that if PSR.ECI says we should not be executing the beat
2118      * corresponding to the lane of the vector register being accessed
2119      * then we should skip perfoming the move, and that we need to do
2120      * the usual check for bad ECI state and advance of ECI state.
2121      * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2122      */
2123     TCGv_i32 tmp;
2124     int vd;
2125 
2126     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
2127         a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
2128         /* Rt/Rt2 cases are UNPREDICTABLE */
2129         return false;
2130     }
2131     if (!mve_eci_check(s) || !vfp_access_check(s)) {
2132         return true;
2133     }
2134 
2135     /* Convert Qreg idx to Dreg for read_neon_element32() etc */
2136     vd = a->qd * 2;
2137 
2138     if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
2139         tmp = load_reg(s, a->rt);
2140         write_neon_element32(tmp, vd, a->idx, MO_32);
2141         tcg_temp_free_i32(tmp);
2142     }
2143     if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
2144         tmp = load_reg(s, a->rt2);
2145         write_neon_element32(tmp, vd + 1, a->idx, MO_32);
2146         tcg_temp_free_i32(tmp);
2147     }
2148 
2149     mve_update_and_store_eci(s);
2150     return true;
2151 }
2152