xref: /qemu/target/arm/tcg/translate-mve.c (revision fac80f0856cc465b21e2e59a64146b3540e055db)
1 /*
2  *  ARM translation: M-profile MVE instructions
3  *
4  *  Copyright (c) 2021 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
27 
28 static inline int vidup_imm(DisasContext *s, int x)
29 {
30     return 1 << x;
31 }
32 
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
35 
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
38 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
39 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
40 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
41 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
42 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
43 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
44 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
45 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
46 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
47 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
48 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
49 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
50 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
51 
52 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
53 static inline long mve_qreg_offset(unsigned reg)
54 {
55     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
56 }
57 
58 static TCGv_ptr mve_qreg_ptr(unsigned reg)
59 {
60     TCGv_ptr ret = tcg_temp_new_ptr();
61     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
62     return ret;
63 }
64 
65 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
66 {
67     /*
68      * Check whether Qregs are in range. For v8.1M only Q0..Q7
69      * are supported, see VFPSmallRegisterBank().
70      */
71     return qmask < 8;
72 }
73 
74 bool mve_eci_check(DisasContext *s)
75 {
76     /*
77      * This is a beatwise insn: check that ECI is valid (not a
78      * reserved value) and note that we are handling it.
79      * Return true if OK, false if we generated an exception.
80      */
81     s->eci_handled = true;
82     switch (s->eci) {
83     case ECI_NONE:
84     case ECI_A0:
85     case ECI_A0A1:
86     case ECI_A0A1A2:
87     case ECI_A0A1A2B0:
88         return true;
89     default:
90         /* Reserved value: INVSTATE UsageFault */
91         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
92                            default_exception_el(s));
93         return false;
94     }
95 }
96 
97 void mve_update_eci(DisasContext *s)
98 {
99     /*
100      * The helper function will always update the CPUState field,
101      * so we only need to update the DisasContext field.
102      */
103     if (s->eci) {
104         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
105     }
106 }
107 
108 void mve_update_and_store_eci(DisasContext *s)
109 {
110     /*
111      * For insns which don't call a helper function that will call
112      * mve_advance_vpt(), this version updates s->eci and also stores
113      * it out to the CPUState field.
114      */
115     if (s->eci) {
116         mve_update_eci(s);
117         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
118     }
119 }
120 
121 static bool mve_skip_first_beat(DisasContext *s)
122 {
123     /* Return true if PSR.ECI says we must skip the first beat of this insn */
124     switch (s->eci) {
125     case ECI_NONE:
126         return false;
127     case ECI_A0:
128     case ECI_A0A1:
129     case ECI_A0A1A2:
130     case ECI_A0A1A2B0:
131         return true;
132     default:
133         g_assert_not_reached();
134     }
135 }
136 
137 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
138                     unsigned msize)
139 {
140     TCGv_i32 addr;
141     uint32_t offset;
142     TCGv_ptr qreg;
143 
144     if (!dc_isar_feature(aa32_mve, s) ||
145         !mve_check_qreg_bank(s, a->qd) ||
146         !fn) {
147         return false;
148     }
149 
150     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
151     if (a->rn == 15 || (a->rn == 13 && a->w)) {
152         return false;
153     }
154 
155     if (!mve_eci_check(s) || !vfp_access_check(s)) {
156         return true;
157     }
158 
159     offset = a->imm << msize;
160     if (!a->a) {
161         offset = -offset;
162     }
163     addr = load_reg(s, a->rn);
164     if (a->p) {
165         tcg_gen_addi_i32(addr, addr, offset);
166     }
167 
168     qreg = mve_qreg_ptr(a->qd);
169     fn(cpu_env, qreg, addr);
170     tcg_temp_free_ptr(qreg);
171 
172     /*
173      * Writeback always happens after the last beat of the insn,
174      * regardless of predication
175      */
176     if (a->w) {
177         if (!a->p) {
178             tcg_gen_addi_i32(addr, addr, offset);
179         }
180         store_reg(s, a->rn, addr);
181     } else {
182         tcg_temp_free_i32(addr);
183     }
184     mve_update_eci(s);
185     return true;
186 }
187 
188 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
189 {
190     static MVEGenLdStFn * const ldstfns[4][2] = {
191         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
192         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
193         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
194         { NULL, NULL }
195     };
196     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
197 }
198 
199 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
200     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
201     {                                                           \
202         static MVEGenLdStFn * const ldstfns[2][2] = {           \
203             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
204             { NULL, gen_helper_mve_##ULD },                     \
205         };                                                      \
206         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
207     }
208 
209 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
210 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
211 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
212 
213 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
214 {
215     TCGv_i32 addr;
216     TCGv_ptr qd, qm;
217 
218     if (!dc_isar_feature(aa32_mve, s) ||
219         !mve_check_qreg_bank(s, a->qd | a->qm) ||
220         !fn || a->rn == 15) {
221         /* Rn case is UNPREDICTABLE */
222         return false;
223     }
224 
225     if (!mve_eci_check(s) || !vfp_access_check(s)) {
226         return true;
227     }
228 
229     addr = load_reg(s, a->rn);
230 
231     qd = mve_qreg_ptr(a->qd);
232     qm = mve_qreg_ptr(a->qm);
233     fn(cpu_env, qd, qm, addr);
234     tcg_temp_free_ptr(qd);
235     tcg_temp_free_ptr(qm);
236     tcg_temp_free_i32(addr);
237     mve_update_eci(s);
238     return true;
239 }
240 
241 /*
242  * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
243  * signextended to halfword elements in register". _os_ indicates that
244  * the offsets in Qm should be scaled by the element size.
245  */
246 /* This macro is just to make the arrays more compact in these functions */
247 #define F(N) gen_helper_mve_##N
248 
249 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
250 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
251 {
252     static MVEGenLdStSGFn * const fns[2][4][4] = { {
253             { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
254             { NULL, NULL,           F(vldrh_sg_sw), NULL },
255             { NULL, NULL,           NULL,           NULL },
256             { NULL, NULL,           NULL,           NULL }
257         }, {
258             { NULL, NULL,              NULL,              NULL },
259             { NULL, NULL,              F(vldrh_sg_os_sw), NULL },
260             { NULL, NULL,              NULL,              NULL },
261             { NULL, NULL,              NULL,              NULL }
262         }
263     };
264     if (a->qd == a->qm) {
265         return false; /* UNPREDICTABLE */
266     }
267     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
268 }
269 
270 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
271 {
272     static MVEGenLdStSGFn * const fns[2][4][4] = { {
273             { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
274             { NULL,           F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
275             { NULL,           NULL,           F(vldrw_sg_uw), NULL },
276             { NULL,           NULL,           NULL,           F(vldrd_sg_ud) }
277         }, {
278             { NULL, NULL,              NULL,              NULL },
279             { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
280             { NULL, NULL,              F(vldrw_sg_os_uw), NULL },
281             { NULL, NULL,              NULL,              F(vldrd_sg_os_ud) }
282         }
283     };
284     if (a->qd == a->qm) {
285         return false; /* UNPREDICTABLE */
286     }
287     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
288 }
289 
290 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
291 {
292     static MVEGenLdStSGFn * const fns[2][4][4] = { {
293             { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
294             { NULL,           F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
295             { NULL,           NULL,           F(vstrw_sg_uw), NULL },
296             { NULL,           NULL,           NULL,           F(vstrd_sg_ud) }
297         }, {
298             { NULL, NULL,              NULL,              NULL },
299             { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
300             { NULL, NULL,              F(vstrw_sg_os_uw), NULL },
301             { NULL, NULL,              NULL,              F(vstrd_sg_os_ud) }
302         }
303     };
304     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
305 }
306 
307 #undef F
308 
309 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
310                            MVEGenLdStSGFn *fn, unsigned msize)
311 {
312     uint32_t offset;
313     TCGv_ptr qd, qm;
314 
315     if (!dc_isar_feature(aa32_mve, s) ||
316         !mve_check_qreg_bank(s, a->qd | a->qm) ||
317         !fn) {
318         return false;
319     }
320 
321     if (!mve_eci_check(s) || !vfp_access_check(s)) {
322         return true;
323     }
324 
325     offset = a->imm << msize;
326     if (!a->a) {
327         offset = -offset;
328     }
329 
330     qd = mve_qreg_ptr(a->qd);
331     qm = mve_qreg_ptr(a->qm);
332     fn(cpu_env, qd, qm, tcg_constant_i32(offset));
333     tcg_temp_free_ptr(qd);
334     tcg_temp_free_ptr(qm);
335     mve_update_eci(s);
336     return true;
337 }
338 
339 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
340 {
341     static MVEGenLdStSGFn * const fns[] = {
342         gen_helper_mve_vldrw_sg_uw,
343         gen_helper_mve_vldrw_sg_wb_uw,
344     };
345     if (a->qd == a->qm) {
346         return false; /* UNPREDICTABLE */
347     }
348     return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
349 }
350 
351 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
352 {
353     static MVEGenLdStSGFn * const fns[] = {
354         gen_helper_mve_vldrd_sg_ud,
355         gen_helper_mve_vldrd_sg_wb_ud,
356     };
357     if (a->qd == a->qm) {
358         return false; /* UNPREDICTABLE */
359     }
360     return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
361 }
362 
363 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
364 {
365     static MVEGenLdStSGFn * const fns[] = {
366         gen_helper_mve_vstrw_sg_uw,
367         gen_helper_mve_vstrw_sg_wb_uw,
368     };
369     return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
370 }
371 
372 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
373 {
374     static MVEGenLdStSGFn * const fns[] = {
375         gen_helper_mve_vstrd_sg_ud,
376         gen_helper_mve_vstrd_sg_wb_ud,
377     };
378     return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
379 }
380 
381 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
382 {
383     TCGv_ptr qd;
384     TCGv_i32 rt;
385 
386     if (!dc_isar_feature(aa32_mve, s) ||
387         !mve_check_qreg_bank(s, a->qd)) {
388         return false;
389     }
390     if (a->rt == 13 || a->rt == 15) {
391         /* UNPREDICTABLE; we choose to UNDEF */
392         return false;
393     }
394     if (!mve_eci_check(s) || !vfp_access_check(s)) {
395         return true;
396     }
397 
398     qd = mve_qreg_ptr(a->qd);
399     rt = load_reg(s, a->rt);
400     tcg_gen_dup_i32(a->size, rt, rt);
401     gen_helper_mve_vdup(cpu_env, qd, rt);
402     tcg_temp_free_ptr(qd);
403     tcg_temp_free_i32(rt);
404     mve_update_eci(s);
405     return true;
406 }
407 
408 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
409 {
410     TCGv_ptr qd, qm;
411 
412     if (!dc_isar_feature(aa32_mve, s) ||
413         !mve_check_qreg_bank(s, a->qd | a->qm) ||
414         !fn) {
415         return false;
416     }
417 
418     if (!mve_eci_check(s) || !vfp_access_check(s)) {
419         return true;
420     }
421 
422     qd = mve_qreg_ptr(a->qd);
423     qm = mve_qreg_ptr(a->qm);
424     fn(cpu_env, qd, qm);
425     tcg_temp_free_ptr(qd);
426     tcg_temp_free_ptr(qm);
427     mve_update_eci(s);
428     return true;
429 }
430 
431 #define DO_1OP(INSN, FN)                                        \
432     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
433     {                                                           \
434         static MVEGenOneOpFn * const fns[] = {                  \
435             gen_helper_mve_##FN##b,                             \
436             gen_helper_mve_##FN##h,                             \
437             gen_helper_mve_##FN##w,                             \
438             NULL,                                               \
439         };                                                      \
440         return do_1op(s, a, fns[a->size]);                      \
441     }
442 
443 DO_1OP(VCLZ, vclz)
444 DO_1OP(VCLS, vcls)
445 DO_1OP(VABS, vabs)
446 DO_1OP(VNEG, vneg)
447 DO_1OP(VQABS, vqabs)
448 DO_1OP(VQNEG, vqneg)
449 DO_1OP(VMAXA, vmaxa)
450 DO_1OP(VMINA, vmina)
451 
452 /* Narrowing moves: only size 0 and 1 are valid */
453 #define DO_VMOVN(INSN, FN) \
454     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
455     {                                                           \
456         static MVEGenOneOpFn * const fns[] = {                  \
457             gen_helper_mve_##FN##b,                             \
458             gen_helper_mve_##FN##h,                             \
459             NULL,                                               \
460             NULL,                                               \
461         };                                                      \
462         return do_1op(s, a, fns[a->size]);                      \
463     }
464 
465 DO_VMOVN(VMOVNB, vmovnb)
466 DO_VMOVN(VMOVNT, vmovnt)
467 DO_VMOVN(VQMOVUNB, vqmovunb)
468 DO_VMOVN(VQMOVUNT, vqmovunt)
469 DO_VMOVN(VQMOVN_BS, vqmovnbs)
470 DO_VMOVN(VQMOVN_TS, vqmovnts)
471 DO_VMOVN(VQMOVN_BU, vqmovnbu)
472 DO_VMOVN(VQMOVN_TU, vqmovntu)
473 
474 static bool trans_VREV16(DisasContext *s, arg_1op *a)
475 {
476     static MVEGenOneOpFn * const fns[] = {
477         gen_helper_mve_vrev16b,
478         NULL,
479         NULL,
480         NULL,
481     };
482     return do_1op(s, a, fns[a->size]);
483 }
484 
485 static bool trans_VREV32(DisasContext *s, arg_1op *a)
486 {
487     static MVEGenOneOpFn * const fns[] = {
488         gen_helper_mve_vrev32b,
489         gen_helper_mve_vrev32h,
490         NULL,
491         NULL,
492     };
493     return do_1op(s, a, fns[a->size]);
494 }
495 
496 static bool trans_VREV64(DisasContext *s, arg_1op *a)
497 {
498     static MVEGenOneOpFn * const fns[] = {
499         gen_helper_mve_vrev64b,
500         gen_helper_mve_vrev64h,
501         gen_helper_mve_vrev64w,
502         NULL,
503     };
504     return do_1op(s, a, fns[a->size]);
505 }
506 
507 static bool trans_VMVN(DisasContext *s, arg_1op *a)
508 {
509     return do_1op(s, a, gen_helper_mve_vmvn);
510 }
511 
512 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
513 {
514     static MVEGenOneOpFn * const fns[] = {
515         NULL,
516         gen_helper_mve_vfabsh,
517         gen_helper_mve_vfabss,
518         NULL,
519     };
520     if (!dc_isar_feature(aa32_mve_fp, s)) {
521         return false;
522     }
523     return do_1op(s, a, fns[a->size]);
524 }
525 
526 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
527 {
528     static MVEGenOneOpFn * const fns[] = {
529         NULL,
530         gen_helper_mve_vfnegh,
531         gen_helper_mve_vfnegs,
532         NULL,
533     };
534     if (!dc_isar_feature(aa32_mve_fp, s)) {
535         return false;
536     }
537     return do_1op(s, a, fns[a->size]);
538 }
539 
540 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
541 {
542     TCGv_ptr qd, qn, qm;
543 
544     if (!dc_isar_feature(aa32_mve, s) ||
545         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
546         !fn) {
547         return false;
548     }
549     if (!mve_eci_check(s) || !vfp_access_check(s)) {
550         return true;
551     }
552 
553     qd = mve_qreg_ptr(a->qd);
554     qn = mve_qreg_ptr(a->qn);
555     qm = mve_qreg_ptr(a->qm);
556     fn(cpu_env, qd, qn, qm);
557     tcg_temp_free_ptr(qd);
558     tcg_temp_free_ptr(qn);
559     tcg_temp_free_ptr(qm);
560     mve_update_eci(s);
561     return true;
562 }
563 
564 #define DO_LOGIC(INSN, HELPER)                                  \
565     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
566     {                                                           \
567         return do_2op(s, a, HELPER);                            \
568     }
569 
570 DO_LOGIC(VAND, gen_helper_mve_vand)
571 DO_LOGIC(VBIC, gen_helper_mve_vbic)
572 DO_LOGIC(VORR, gen_helper_mve_vorr)
573 DO_LOGIC(VORN, gen_helper_mve_vorn)
574 DO_LOGIC(VEOR, gen_helper_mve_veor)
575 
576 DO_LOGIC(VPSEL, gen_helper_mve_vpsel)
577 
578 #define DO_2OP(INSN, FN) \
579     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
580     {                                                           \
581         static MVEGenTwoOpFn * const fns[] = {                  \
582             gen_helper_mve_##FN##b,                             \
583             gen_helper_mve_##FN##h,                             \
584             gen_helper_mve_##FN##w,                             \
585             NULL,                                               \
586         };                                                      \
587         return do_2op(s, a, fns[a->size]);                      \
588     }
589 
590 DO_2OP(VADD, vadd)
591 DO_2OP(VSUB, vsub)
592 DO_2OP(VMUL, vmul)
593 DO_2OP(VMULH_S, vmulhs)
594 DO_2OP(VMULH_U, vmulhu)
595 DO_2OP(VRMULH_S, vrmulhs)
596 DO_2OP(VRMULH_U, vrmulhu)
597 DO_2OP(VMAX_S, vmaxs)
598 DO_2OP(VMAX_U, vmaxu)
599 DO_2OP(VMIN_S, vmins)
600 DO_2OP(VMIN_U, vminu)
601 DO_2OP(VABD_S, vabds)
602 DO_2OP(VABD_U, vabdu)
603 DO_2OP(VHADD_S, vhadds)
604 DO_2OP(VHADD_U, vhaddu)
605 DO_2OP(VHSUB_S, vhsubs)
606 DO_2OP(VHSUB_U, vhsubu)
607 DO_2OP(VMULL_BS, vmullbs)
608 DO_2OP(VMULL_BU, vmullbu)
609 DO_2OP(VMULL_TS, vmullts)
610 DO_2OP(VMULL_TU, vmulltu)
611 DO_2OP(VQDMULH, vqdmulh)
612 DO_2OP(VQRDMULH, vqrdmulh)
613 DO_2OP(VQADD_S, vqadds)
614 DO_2OP(VQADD_U, vqaddu)
615 DO_2OP(VQSUB_S, vqsubs)
616 DO_2OP(VQSUB_U, vqsubu)
617 DO_2OP(VSHL_S, vshls)
618 DO_2OP(VSHL_U, vshlu)
619 DO_2OP(VRSHL_S, vrshls)
620 DO_2OP(VRSHL_U, vrshlu)
621 DO_2OP(VQSHL_S, vqshls)
622 DO_2OP(VQSHL_U, vqshlu)
623 DO_2OP(VQRSHL_S, vqrshls)
624 DO_2OP(VQRSHL_U, vqrshlu)
625 DO_2OP(VQDMLADH, vqdmladh)
626 DO_2OP(VQDMLADHX, vqdmladhx)
627 DO_2OP(VQRDMLADH, vqrdmladh)
628 DO_2OP(VQRDMLADHX, vqrdmladhx)
629 DO_2OP(VQDMLSDH, vqdmlsdh)
630 DO_2OP(VQDMLSDHX, vqdmlsdhx)
631 DO_2OP(VQRDMLSDH, vqrdmlsdh)
632 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
633 DO_2OP(VRHADD_S, vrhadds)
634 DO_2OP(VRHADD_U, vrhaddu)
635 /*
636  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
637  * so we can reuse the DO_2OP macro. (Our implementation calculates the
638  * "expected" results in this case.) Similarly for VHCADD.
639  */
640 DO_2OP(VCADD90, vcadd90)
641 DO_2OP(VCADD270, vcadd270)
642 DO_2OP(VHCADD90, vhcadd90)
643 DO_2OP(VHCADD270, vhcadd270)
644 
645 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
646 {
647     static MVEGenTwoOpFn * const fns[] = {
648         NULL,
649         gen_helper_mve_vqdmullbh,
650         gen_helper_mve_vqdmullbw,
651         NULL,
652     };
653     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
654         /* UNPREDICTABLE; we choose to undef */
655         return false;
656     }
657     return do_2op(s, a, fns[a->size]);
658 }
659 
660 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
661 {
662     static MVEGenTwoOpFn * const fns[] = {
663         NULL,
664         gen_helper_mve_vqdmullth,
665         gen_helper_mve_vqdmulltw,
666         NULL,
667     };
668     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
669         /* UNPREDICTABLE; we choose to undef */
670         return false;
671     }
672     return do_2op(s, a, fns[a->size]);
673 }
674 
675 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
676 {
677     /*
678      * Note that a->size indicates the output size, ie VMULL.P8
679      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
680      * is the 16x16->32 operation and a->size is MO_32.
681      */
682     static MVEGenTwoOpFn * const fns[] = {
683         NULL,
684         gen_helper_mve_vmullpbh,
685         gen_helper_mve_vmullpbw,
686         NULL,
687     };
688     return do_2op(s, a, fns[a->size]);
689 }
690 
691 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
692 {
693     /* a->size is as for trans_VMULLP_B */
694     static MVEGenTwoOpFn * const fns[] = {
695         NULL,
696         gen_helper_mve_vmullpth,
697         gen_helper_mve_vmullptw,
698         NULL,
699     };
700     return do_2op(s, a, fns[a->size]);
701 }
702 
703 /*
704  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
705  * of the 32-bit elements in each lane of the input vectors, where the
706  * carry-out of each add is the carry-in of the next.  The initial carry
707  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
708  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
709  * These insns are subject to beat-wise execution.  Partial execution
710  * of an I=1 (initial carry input fixed) insn which does not
711  * execute the first beat must start with the current FPSCR.NZCV
712  * value, not the fixed constant input.
713  */
714 static bool trans_VADC(DisasContext *s, arg_2op *a)
715 {
716     return do_2op(s, a, gen_helper_mve_vadc);
717 }
718 
719 static bool trans_VADCI(DisasContext *s, arg_2op *a)
720 {
721     if (mve_skip_first_beat(s)) {
722         return trans_VADC(s, a);
723     }
724     return do_2op(s, a, gen_helper_mve_vadci);
725 }
726 
727 static bool trans_VSBC(DisasContext *s, arg_2op *a)
728 {
729     return do_2op(s, a, gen_helper_mve_vsbc);
730 }
731 
732 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
733 {
734     if (mve_skip_first_beat(s)) {
735         return trans_VSBC(s, a);
736     }
737     return do_2op(s, a, gen_helper_mve_vsbci);
738 }
739 
740 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
741                           MVEGenTwoOpScalarFn fn)
742 {
743     TCGv_ptr qd, qn;
744     TCGv_i32 rm;
745 
746     if (!dc_isar_feature(aa32_mve, s) ||
747         !mve_check_qreg_bank(s, a->qd | a->qn) ||
748         !fn) {
749         return false;
750     }
751     if (a->rm == 13 || a->rm == 15) {
752         /* UNPREDICTABLE */
753         return false;
754     }
755     if (!mve_eci_check(s) || !vfp_access_check(s)) {
756         return true;
757     }
758 
759     qd = mve_qreg_ptr(a->qd);
760     qn = mve_qreg_ptr(a->qn);
761     rm = load_reg(s, a->rm);
762     fn(cpu_env, qd, qn, rm);
763     tcg_temp_free_i32(rm);
764     tcg_temp_free_ptr(qd);
765     tcg_temp_free_ptr(qn);
766     mve_update_eci(s);
767     return true;
768 }
769 
770 #define DO_2OP_SCALAR(INSN, FN) \
771     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
772     {                                                           \
773         static MVEGenTwoOpScalarFn * const fns[] = {            \
774             gen_helper_mve_##FN##b,                             \
775             gen_helper_mve_##FN##h,                             \
776             gen_helper_mve_##FN##w,                             \
777             NULL,                                               \
778         };                                                      \
779         return do_2op_scalar(s, a, fns[a->size]);               \
780     }
781 
782 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
783 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
784 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
785 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
786 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
787 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
788 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
789 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
790 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
791 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
792 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
793 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
794 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
795 DO_2OP_SCALAR(VBRSR, vbrsr)
796 DO_2OP_SCALAR(VMLA, vmla)
797 DO_2OP_SCALAR(VMLAS, vmlas)
798 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
799 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
800 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
801 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
802 
803 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
804 {
805     static MVEGenTwoOpScalarFn * const fns[] = {
806         NULL,
807         gen_helper_mve_vqdmullb_scalarh,
808         gen_helper_mve_vqdmullb_scalarw,
809         NULL,
810     };
811     if (a->qd == a->qn && a->size == MO_32) {
812         /* UNPREDICTABLE; we choose to undef */
813         return false;
814     }
815     return do_2op_scalar(s, a, fns[a->size]);
816 }
817 
818 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
819 {
820     static MVEGenTwoOpScalarFn * const fns[] = {
821         NULL,
822         gen_helper_mve_vqdmullt_scalarh,
823         gen_helper_mve_vqdmullt_scalarw,
824         NULL,
825     };
826     if (a->qd == a->qn && a->size == MO_32) {
827         /* UNPREDICTABLE; we choose to undef */
828         return false;
829     }
830     return do_2op_scalar(s, a, fns[a->size]);
831 }
832 
833 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
834                              MVEGenLongDualAccOpFn *fn)
835 {
836     TCGv_ptr qn, qm;
837     TCGv_i64 rda;
838     TCGv_i32 rdalo, rdahi;
839 
840     if (!dc_isar_feature(aa32_mve, s) ||
841         !mve_check_qreg_bank(s, a->qn | a->qm) ||
842         !fn) {
843         return false;
844     }
845     /*
846      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
847      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
848      */
849     if (a->rdahi == 13 || a->rdahi == 15) {
850         return false;
851     }
852     if (!mve_eci_check(s) || !vfp_access_check(s)) {
853         return true;
854     }
855 
856     qn = mve_qreg_ptr(a->qn);
857     qm = mve_qreg_ptr(a->qm);
858 
859     /*
860      * This insn is subject to beat-wise execution. Partial execution
861      * of an A=0 (no-accumulate) insn which does not execute the first
862      * beat must start with the current rda value, not 0.
863      */
864     if (a->a || mve_skip_first_beat(s)) {
865         rda = tcg_temp_new_i64();
866         rdalo = load_reg(s, a->rdalo);
867         rdahi = load_reg(s, a->rdahi);
868         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
869         tcg_temp_free_i32(rdalo);
870         tcg_temp_free_i32(rdahi);
871     } else {
872         rda = tcg_const_i64(0);
873     }
874 
875     fn(rda, cpu_env, qn, qm, rda);
876     tcg_temp_free_ptr(qn);
877     tcg_temp_free_ptr(qm);
878 
879     rdalo = tcg_temp_new_i32();
880     rdahi = tcg_temp_new_i32();
881     tcg_gen_extrl_i64_i32(rdalo, rda);
882     tcg_gen_extrh_i64_i32(rdahi, rda);
883     store_reg(s, a->rdalo, rdalo);
884     store_reg(s, a->rdahi, rdahi);
885     tcg_temp_free_i64(rda);
886     mve_update_eci(s);
887     return true;
888 }
889 
890 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
891 {
892     static MVEGenLongDualAccOpFn * const fns[4][2] = {
893         { NULL, NULL },
894         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
895         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
896         { NULL, NULL },
897     };
898     return do_long_dual_acc(s, a, fns[a->size][a->x]);
899 }
900 
901 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
902 {
903     static MVEGenLongDualAccOpFn * const fns[4][2] = {
904         { NULL, NULL },
905         { gen_helper_mve_vmlaldavuh, NULL },
906         { gen_helper_mve_vmlaldavuw, NULL },
907         { NULL, NULL },
908     };
909     return do_long_dual_acc(s, a, fns[a->size][a->x]);
910 }
911 
912 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
913 {
914     static MVEGenLongDualAccOpFn * const fns[4][2] = {
915         { NULL, NULL },
916         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
917         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
918         { NULL, NULL },
919     };
920     return do_long_dual_acc(s, a, fns[a->size][a->x]);
921 }
922 
923 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
924 {
925     static MVEGenLongDualAccOpFn * const fns[] = {
926         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
927     };
928     return do_long_dual_acc(s, a, fns[a->x]);
929 }
930 
931 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
932 {
933     static MVEGenLongDualAccOpFn * const fns[] = {
934         gen_helper_mve_vrmlaldavhuw, NULL,
935     };
936     return do_long_dual_acc(s, a, fns[a->x]);
937 }
938 
939 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
940 {
941     static MVEGenLongDualAccOpFn * const fns[] = {
942         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
943     };
944     return do_long_dual_acc(s, a, fns[a->x]);
945 }
946 
947 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
948 {
949     TCGv_ptr qn, qm;
950     TCGv_i32 rda;
951 
952     if (!dc_isar_feature(aa32_mve, s) ||
953         !mve_check_qreg_bank(s, a->qn) ||
954         !fn) {
955         return false;
956     }
957     if (!mve_eci_check(s) || !vfp_access_check(s)) {
958         return true;
959     }
960 
961     qn = mve_qreg_ptr(a->qn);
962     qm = mve_qreg_ptr(a->qm);
963 
964     /*
965      * This insn is subject to beat-wise execution. Partial execution
966      * of an A=0 (no-accumulate) insn which does not execute the first
967      * beat must start with the current rda value, not 0.
968      */
969     if (a->a || mve_skip_first_beat(s)) {
970         rda = load_reg(s, a->rda);
971     } else {
972         rda = tcg_const_i32(0);
973     }
974 
975     fn(rda, cpu_env, qn, qm, rda);
976     store_reg(s, a->rda, rda);
977     tcg_temp_free_ptr(qn);
978     tcg_temp_free_ptr(qm);
979 
980     mve_update_eci(s);
981     return true;
982 }
983 
984 #define DO_DUAL_ACC(INSN, FN)                                           \
985     static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
986     {                                                                   \
987         static MVEGenDualAccOpFn * const fns[4][2] = {                  \
988             { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
989             { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
990             { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
991             { NULL, NULL },                                             \
992         };                                                              \
993         return do_dual_acc(s, a, fns[a->size][a->x]);                   \
994     }
995 
996 DO_DUAL_ACC(VMLADAV_S, vmladavs)
997 DO_DUAL_ACC(VMLSDAV, vmlsdav)
998 
999 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
1000 {
1001     static MVEGenDualAccOpFn * const fns[4][2] = {
1002         { gen_helper_mve_vmladavub, NULL },
1003         { gen_helper_mve_vmladavuh, NULL },
1004         { gen_helper_mve_vmladavuw, NULL },
1005         { NULL, NULL },
1006     };
1007     return do_dual_acc(s, a, fns[a->size][a->x]);
1008 }
1009 
1010 static void gen_vpst(DisasContext *s, uint32_t mask)
1011 {
1012     /*
1013      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
1014      * being adjacent fields in the register.
1015      *
1016      * Updating the masks is not predicated, but it is subject to beat-wise
1017      * execution, and the mask is updated on the odd-numbered beats.
1018      * So if PSR.ECI says we should skip beat 1, we mustn't update the
1019      * 01 mask field.
1020      */
1021     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
1022     switch (s->eci) {
1023     case ECI_NONE:
1024     case ECI_A0:
1025         /* Update both 01 and 23 fields */
1026         tcg_gen_deposit_i32(vpr, vpr,
1027                             tcg_constant_i32(mask | (mask << 4)),
1028                             R_V7M_VPR_MASK01_SHIFT,
1029                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
1030         break;
1031     case ECI_A0A1:
1032     case ECI_A0A1A2:
1033     case ECI_A0A1A2B0:
1034         /* Update only the 23 mask field */
1035         tcg_gen_deposit_i32(vpr, vpr,
1036                             tcg_constant_i32(mask),
1037                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
1038         break;
1039     default:
1040         g_assert_not_reached();
1041     }
1042     store_cpu_field(vpr, v7m.vpr);
1043 }
1044 
1045 static bool trans_VPST(DisasContext *s, arg_VPST *a)
1046 {
1047     /* mask == 0 is a "related encoding" */
1048     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
1049         return false;
1050     }
1051     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1052         return true;
1053     }
1054     gen_vpst(s, a->mask);
1055     mve_update_and_store_eci(s);
1056     return true;
1057 }
1058 
1059 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
1060 {
1061     /*
1062      * Invert the predicate in VPR.P0. We have call out to
1063      * a helper because this insn itself is beatwise and can
1064      * be predicated.
1065      */
1066     if (!dc_isar_feature(aa32_mve, s)) {
1067         return false;
1068     }
1069     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1070         return true;
1071     }
1072 
1073     gen_helper_mve_vpnot(cpu_env);
1074     mve_update_eci(s);
1075     return true;
1076 }
1077 
1078 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
1079 {
1080     /* VADDV: vector add across vector */
1081     static MVEGenVADDVFn * const fns[4][2] = {
1082         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
1083         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
1084         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
1085         { NULL, NULL }
1086     };
1087     TCGv_ptr qm;
1088     TCGv_i32 rda;
1089 
1090     if (!dc_isar_feature(aa32_mve, s) ||
1091         a->size == 3) {
1092         return false;
1093     }
1094     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1095         return true;
1096     }
1097 
1098     /*
1099      * This insn is subject to beat-wise execution. Partial execution
1100      * of an A=0 (no-accumulate) insn which does not execute the first
1101      * beat must start with the current value of Rda, not zero.
1102      */
1103     if (a->a || mve_skip_first_beat(s)) {
1104         /* Accumulate input from Rda */
1105         rda = load_reg(s, a->rda);
1106     } else {
1107         /* Accumulate starting at zero */
1108         rda = tcg_const_i32(0);
1109     }
1110 
1111     qm = mve_qreg_ptr(a->qm);
1112     fns[a->size][a->u](rda, cpu_env, qm, rda);
1113     store_reg(s, a->rda, rda);
1114     tcg_temp_free_ptr(qm);
1115 
1116     mve_update_eci(s);
1117     return true;
1118 }
1119 
1120 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
1121 {
1122     /*
1123      * Vector Add Long Across Vector: accumulate the 32-bit
1124      * elements of the vector into a 64-bit result stored in
1125      * a pair of general-purpose registers.
1126      * No need to check Qm's bank: it is only 3 bits in decode.
1127      */
1128     TCGv_ptr qm;
1129     TCGv_i64 rda;
1130     TCGv_i32 rdalo, rdahi;
1131 
1132     if (!dc_isar_feature(aa32_mve, s)) {
1133         return false;
1134     }
1135     /*
1136      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1137      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1138      */
1139     if (a->rdahi == 13 || a->rdahi == 15) {
1140         return false;
1141     }
1142     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1143         return true;
1144     }
1145 
1146     /*
1147      * This insn is subject to beat-wise execution. Partial execution
1148      * of an A=0 (no-accumulate) insn which does not execute the first
1149      * beat must start with the current value of RdaHi:RdaLo, not zero.
1150      */
1151     if (a->a || mve_skip_first_beat(s)) {
1152         /* Accumulate input from RdaHi:RdaLo */
1153         rda = tcg_temp_new_i64();
1154         rdalo = load_reg(s, a->rdalo);
1155         rdahi = load_reg(s, a->rdahi);
1156         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
1157         tcg_temp_free_i32(rdalo);
1158         tcg_temp_free_i32(rdahi);
1159     } else {
1160         /* Accumulate starting at zero */
1161         rda = tcg_const_i64(0);
1162     }
1163 
1164     qm = mve_qreg_ptr(a->qm);
1165     if (a->u) {
1166         gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
1167     } else {
1168         gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
1169     }
1170     tcg_temp_free_ptr(qm);
1171 
1172     rdalo = tcg_temp_new_i32();
1173     rdahi = tcg_temp_new_i32();
1174     tcg_gen_extrl_i64_i32(rdalo, rda);
1175     tcg_gen_extrh_i64_i32(rdahi, rda);
1176     store_reg(s, a->rdalo, rdalo);
1177     store_reg(s, a->rdahi, rdahi);
1178     tcg_temp_free_i64(rda);
1179     mve_update_eci(s);
1180     return true;
1181 }
1182 
1183 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
1184 {
1185     TCGv_ptr qd;
1186     uint64_t imm;
1187 
1188     if (!dc_isar_feature(aa32_mve, s) ||
1189         !mve_check_qreg_bank(s, a->qd) ||
1190         !fn) {
1191         return false;
1192     }
1193     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1194         return true;
1195     }
1196 
1197     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1198 
1199     qd = mve_qreg_ptr(a->qd);
1200     fn(cpu_env, qd, tcg_constant_i64(imm));
1201     tcg_temp_free_ptr(qd);
1202     mve_update_eci(s);
1203     return true;
1204 }
1205 
1206 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1207 {
1208     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1209     MVEGenOneOpImmFn *fn;
1210 
1211     if ((a->cmode & 1) && a->cmode < 12) {
1212         if (a->op) {
1213             /*
1214              * For op=1, the immediate will be inverted by asimd_imm_const(),
1215              * so the VBIC becomes a logical AND operation.
1216              */
1217             fn = gen_helper_mve_vandi;
1218         } else {
1219             fn = gen_helper_mve_vorri;
1220         }
1221     } else {
1222         /* There is one unallocated cmode/op combination in this space */
1223         if (a->cmode == 15 && a->op == 1) {
1224             return false;
1225         }
1226         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1227         fn = gen_helper_mve_vmovi;
1228     }
1229     return do_1imm(s, a, fn);
1230 }
1231 
1232 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1233                       bool negateshift)
1234 {
1235     TCGv_ptr qd, qm;
1236     int shift = a->shift;
1237 
1238     if (!dc_isar_feature(aa32_mve, s) ||
1239         !mve_check_qreg_bank(s, a->qd | a->qm) ||
1240         !fn) {
1241         return false;
1242     }
1243     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1244         return true;
1245     }
1246 
1247     /*
1248      * When we handle a right shift insn using a left-shift helper
1249      * which permits a negative shift count to indicate a right-shift,
1250      * we must negate the shift count.
1251      */
1252     if (negateshift) {
1253         shift = -shift;
1254     }
1255 
1256     qd = mve_qreg_ptr(a->qd);
1257     qm = mve_qreg_ptr(a->qm);
1258     fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1259     tcg_temp_free_ptr(qd);
1260     tcg_temp_free_ptr(qm);
1261     mve_update_eci(s);
1262     return true;
1263 }
1264 
1265 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)                         \
1266     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1267     {                                                           \
1268         static MVEGenTwoOpShiftFn * const fns[] = {             \
1269             gen_helper_mve_##FN##b,                             \
1270             gen_helper_mve_##FN##h,                             \
1271             gen_helper_mve_##FN##w,                             \
1272             NULL,                                               \
1273         };                                                      \
1274         return do_2shift(s, a, fns[a->size], NEGATESHIFT);      \
1275     }
1276 
1277 DO_2SHIFT(VSHLI, vshli_u, false)
1278 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1279 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1280 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1281 /* These right shifts use a left-shift helper with negated shift count */
1282 DO_2SHIFT(VSHRI_S, vshli_s, true)
1283 DO_2SHIFT(VSHRI_U, vshli_u, true)
1284 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1285 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1286 
1287 DO_2SHIFT(VSRI, vsri, false)
1288 DO_2SHIFT(VSLI, vsli, false)
1289 
1290 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1291                              MVEGenTwoOpShiftFn *fn)
1292 {
1293     TCGv_ptr qda;
1294     TCGv_i32 rm;
1295 
1296     if (!dc_isar_feature(aa32_mve, s) ||
1297         !mve_check_qreg_bank(s, a->qda) ||
1298         a->rm == 13 || a->rm == 15 || !fn) {
1299         /* Rm cases are UNPREDICTABLE */
1300         return false;
1301     }
1302     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1303         return true;
1304     }
1305 
1306     qda = mve_qreg_ptr(a->qda);
1307     rm = load_reg(s, a->rm);
1308     fn(cpu_env, qda, qda, rm);
1309     tcg_temp_free_ptr(qda);
1310     tcg_temp_free_i32(rm);
1311     mve_update_eci(s);
1312     return true;
1313 }
1314 
1315 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
1316     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
1317     {                                                                   \
1318         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1319             gen_helper_mve_##FN##b,                                     \
1320             gen_helper_mve_##FN##h,                                     \
1321             gen_helper_mve_##FN##w,                                     \
1322             NULL,                                                       \
1323         };                                                              \
1324         return do_2shift_scalar(s, a, fns[a->size]);                    \
1325     }
1326 
1327 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1328 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1329 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1330 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1331 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1332 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1333 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1334 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1335 
1336 #define DO_VSHLL(INSN, FN)                                      \
1337     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1338     {                                                           \
1339         static MVEGenTwoOpShiftFn * const fns[] = {             \
1340             gen_helper_mve_##FN##b,                             \
1341             gen_helper_mve_##FN##h,                             \
1342         };                                                      \
1343         return do_2shift(s, a, fns[a->size], false);            \
1344     }
1345 
1346 DO_VSHLL(VSHLL_BS, vshllbs)
1347 DO_VSHLL(VSHLL_BU, vshllbu)
1348 DO_VSHLL(VSHLL_TS, vshllts)
1349 DO_VSHLL(VSHLL_TU, vshlltu)
1350 
1351 #define DO_2SHIFT_N(INSN, FN)                                   \
1352     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1353     {                                                           \
1354         static MVEGenTwoOpShiftFn * const fns[] = {             \
1355             gen_helper_mve_##FN##b,                             \
1356             gen_helper_mve_##FN##h,                             \
1357         };                                                      \
1358         return do_2shift(s, a, fns[a->size], false);            \
1359     }
1360 
1361 DO_2SHIFT_N(VSHRNB, vshrnb)
1362 DO_2SHIFT_N(VSHRNT, vshrnt)
1363 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1364 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1365 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1366 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1367 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1368 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1369 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1370 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1371 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1372 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1373 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1374 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1375 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1376 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1377 
1378 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1379 {
1380     /*
1381      * Whole Vector Left Shift with Carry. The carry is taken
1382      * from a general purpose register and written back there.
1383      * An imm of 0 means "shift by 32".
1384      */
1385     TCGv_ptr qd;
1386     TCGv_i32 rdm;
1387 
1388     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1389         return false;
1390     }
1391     if (a->rdm == 13 || a->rdm == 15) {
1392         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1393         return false;
1394     }
1395     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1396         return true;
1397     }
1398 
1399     qd = mve_qreg_ptr(a->qd);
1400     rdm = load_reg(s, a->rdm);
1401     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1402     store_reg(s, a->rdm, rdm);
1403     tcg_temp_free_ptr(qd);
1404     mve_update_eci(s);
1405     return true;
1406 }
1407 
1408 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1409 {
1410     TCGv_ptr qd;
1411     TCGv_i32 rn;
1412 
1413     /*
1414      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1415      * This fills the vector with elements of successively increasing
1416      * or decreasing values, starting from Rn.
1417      */
1418     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1419         return false;
1420     }
1421     if (a->size == MO_64) {
1422         /* size 0b11 is another encoding */
1423         return false;
1424     }
1425     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1426         return true;
1427     }
1428 
1429     qd = mve_qreg_ptr(a->qd);
1430     rn = load_reg(s, a->rn);
1431     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1432     store_reg(s, a->rn, rn);
1433     tcg_temp_free_ptr(qd);
1434     mve_update_eci(s);
1435     return true;
1436 }
1437 
1438 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1439 {
1440     TCGv_ptr qd;
1441     TCGv_i32 rn, rm;
1442 
1443     /*
1444      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1445      * This fills the vector with elements of successively increasing
1446      * or decreasing values, starting from Rn. Rm specifies a point where
1447      * the count wraps back around to 0. The updated offset is written back
1448      * to Rn.
1449      */
1450     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1451         return false;
1452     }
1453     if (!fn || a->rm == 13 || a->rm == 15) {
1454         /*
1455          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1456          * Rm == 13 is VIWDUP, VDWDUP.
1457          */
1458         return false;
1459     }
1460     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1461         return true;
1462     }
1463 
1464     qd = mve_qreg_ptr(a->qd);
1465     rn = load_reg(s, a->rn);
1466     rm = load_reg(s, a->rm);
1467     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1468     store_reg(s, a->rn, rn);
1469     tcg_temp_free_ptr(qd);
1470     tcg_temp_free_i32(rm);
1471     mve_update_eci(s);
1472     return true;
1473 }
1474 
1475 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1476 {
1477     static MVEGenVIDUPFn * const fns[] = {
1478         gen_helper_mve_vidupb,
1479         gen_helper_mve_viduph,
1480         gen_helper_mve_vidupw,
1481         NULL,
1482     };
1483     return do_vidup(s, a, fns[a->size]);
1484 }
1485 
1486 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1487 {
1488     static MVEGenVIDUPFn * const fns[] = {
1489         gen_helper_mve_vidupb,
1490         gen_helper_mve_viduph,
1491         gen_helper_mve_vidupw,
1492         NULL,
1493     };
1494     /* VDDUP is just like VIDUP but with a negative immediate */
1495     a->imm = -a->imm;
1496     return do_vidup(s, a, fns[a->size]);
1497 }
1498 
1499 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1500 {
1501     static MVEGenVIWDUPFn * const fns[] = {
1502         gen_helper_mve_viwdupb,
1503         gen_helper_mve_viwduph,
1504         gen_helper_mve_viwdupw,
1505         NULL,
1506     };
1507     return do_viwdup(s, a, fns[a->size]);
1508 }
1509 
1510 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1511 {
1512     static MVEGenVIWDUPFn * const fns[] = {
1513         gen_helper_mve_vdwdupb,
1514         gen_helper_mve_vdwduph,
1515         gen_helper_mve_vdwdupw,
1516         NULL,
1517     };
1518     return do_viwdup(s, a, fns[a->size]);
1519 }
1520 
1521 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1522 {
1523     TCGv_ptr qn, qm;
1524 
1525     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1526         !fn) {
1527         return false;
1528     }
1529     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1530         return true;
1531     }
1532 
1533     qn = mve_qreg_ptr(a->qn);
1534     qm = mve_qreg_ptr(a->qm);
1535     fn(cpu_env, qn, qm);
1536     tcg_temp_free_ptr(qn);
1537     tcg_temp_free_ptr(qm);
1538     if (a->mask) {
1539         /* VPT */
1540         gen_vpst(s, a->mask);
1541     }
1542     mve_update_eci(s);
1543     return true;
1544 }
1545 
1546 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1547                            MVEGenScalarCmpFn *fn)
1548 {
1549     TCGv_ptr qn;
1550     TCGv_i32 rm;
1551 
1552     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1553         return false;
1554     }
1555     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1556         return true;
1557     }
1558 
1559     qn = mve_qreg_ptr(a->qn);
1560     if (a->rm == 15) {
1561         /* Encoding Rm=0b1111 means "constant zero" */
1562         rm = tcg_constant_i32(0);
1563     } else {
1564         rm = load_reg(s, a->rm);
1565     }
1566     fn(cpu_env, qn, rm);
1567     tcg_temp_free_ptr(qn);
1568     tcg_temp_free_i32(rm);
1569     if (a->mask) {
1570         /* VPT */
1571         gen_vpst(s, a->mask);
1572     }
1573     mve_update_eci(s);
1574     return true;
1575 }
1576 
1577 #define DO_VCMP(INSN, FN)                                       \
1578     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
1579     {                                                           \
1580         static MVEGenCmpFn * const fns[] = {                    \
1581             gen_helper_mve_##FN##b,                             \
1582             gen_helper_mve_##FN##h,                             \
1583             gen_helper_mve_##FN##w,                             \
1584             NULL,                                               \
1585         };                                                      \
1586         return do_vcmp(s, a, fns[a->size]);                     \
1587     }                                                           \
1588     static bool trans_##INSN##_scalar(DisasContext *s,          \
1589                                       arg_vcmp_scalar *a)       \
1590     {                                                           \
1591         static MVEGenScalarCmpFn * const fns[] = {              \
1592             gen_helper_mve_##FN##_scalarb,                      \
1593             gen_helper_mve_##FN##_scalarh,                      \
1594             gen_helper_mve_##FN##_scalarw,                      \
1595             NULL,                                               \
1596         };                                                      \
1597         return do_vcmp_scalar(s, a, fns[a->size]);              \
1598     }
1599 
1600 DO_VCMP(VCMPEQ, vcmpeq)
1601 DO_VCMP(VCMPNE, vcmpne)
1602 DO_VCMP(VCMPCS, vcmpcs)
1603 DO_VCMP(VCMPHI, vcmphi)
1604 DO_VCMP(VCMPGE, vcmpge)
1605 DO_VCMP(VCMPLT, vcmplt)
1606 DO_VCMP(VCMPGT, vcmpgt)
1607 DO_VCMP(VCMPLE, vcmple)
1608 
1609 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
1610 {
1611     /*
1612      * MIN/MAX operations across a vector: compute the min or
1613      * max of the initial value in a general purpose register
1614      * and all the elements in the vector, and store it back
1615      * into the general purpose register.
1616      */
1617     TCGv_ptr qm;
1618     TCGv_i32 rda;
1619 
1620     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1621         !fn || a->rda == 13 || a->rda == 15) {
1622         /* Rda cases are UNPREDICTABLE */
1623         return false;
1624     }
1625     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1626         return true;
1627     }
1628 
1629     qm = mve_qreg_ptr(a->qm);
1630     rda = load_reg(s, a->rda);
1631     fn(rda, cpu_env, qm, rda);
1632     store_reg(s, a->rda, rda);
1633     tcg_temp_free_ptr(qm);
1634     mve_update_eci(s);
1635     return true;
1636 }
1637 
1638 #define DO_VMAXV(INSN, FN)                                      \
1639     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
1640     {                                                           \
1641         static MVEGenVADDVFn * const fns[] = {                  \
1642             gen_helper_mve_##FN##b,                             \
1643             gen_helper_mve_##FN##h,                             \
1644             gen_helper_mve_##FN##w,                             \
1645             NULL,                                               \
1646         };                                                      \
1647         return do_vmaxv(s, a, fns[a->size]);                    \
1648     }
1649 
1650 DO_VMAXV(VMAXV_S, vmaxvs)
1651 DO_VMAXV(VMAXV_U, vmaxvu)
1652 DO_VMAXV(VMAXAV, vmaxav)
1653 DO_VMAXV(VMINV_S, vminvs)
1654 DO_VMAXV(VMINV_U, vminvu)
1655 DO_VMAXV(VMINAV, vminav)
1656 
1657 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
1658 {
1659     /* Absolute difference accumulated across vector */
1660     TCGv_ptr qn, qm;
1661     TCGv_i32 rda;
1662 
1663     if (!dc_isar_feature(aa32_mve, s) ||
1664         !mve_check_qreg_bank(s, a->qm | a->qn) ||
1665         !fn || a->rda == 13 || a->rda == 15) {
1666         /* Rda cases are UNPREDICTABLE */
1667         return false;
1668     }
1669     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1670         return true;
1671     }
1672 
1673     qm = mve_qreg_ptr(a->qm);
1674     qn = mve_qreg_ptr(a->qn);
1675     rda = load_reg(s, a->rda);
1676     fn(rda, cpu_env, qn, qm, rda);
1677     store_reg(s, a->rda, rda);
1678     tcg_temp_free_ptr(qm);
1679     tcg_temp_free_ptr(qn);
1680     mve_update_eci(s);
1681     return true;
1682 }
1683 
1684 #define DO_VABAV(INSN, FN)                                      \
1685     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
1686     {                                                           \
1687         static MVEGenVABAVFn * const fns[] = {                  \
1688             gen_helper_mve_##FN##b,                             \
1689             gen_helper_mve_##FN##h,                             \
1690             gen_helper_mve_##FN##w,                             \
1691             NULL,                                               \
1692         };                                                      \
1693         return do_vabav(s, a, fns[a->size]);                    \
1694     }
1695 
1696 DO_VABAV(VABAV_S, vabavs)
1697 DO_VABAV(VABAV_U, vabavu)
1698 
1699 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
1700 {
1701     /*
1702      * VMOV two 32-bit vector lanes to two general-purpose registers.
1703      * This insn is not predicated but it is subject to beat-wise
1704      * execution if it is not in an IT block. For us this means
1705      * only that if PSR.ECI says we should not be executing the beat
1706      * corresponding to the lane of the vector register being accessed
1707      * then we should skip perfoming the move, and that we need to do
1708      * the usual check for bad ECI state and advance of ECI state.
1709      * (If PSR.ECI is non-zero then we cannot be in an IT block.)
1710      */
1711     TCGv_i32 tmp;
1712     int vd;
1713 
1714     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
1715         a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
1716         a->rt == a->rt2) {
1717         /* Rt/Rt2 cases are UNPREDICTABLE */
1718         return false;
1719     }
1720     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1721         return true;
1722     }
1723 
1724     /* Convert Qreg index to Dreg for read_neon_element32() etc */
1725     vd = a->qd * 2;
1726 
1727     if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
1728         tmp = tcg_temp_new_i32();
1729         read_neon_element32(tmp, vd, a->idx, MO_32);
1730         store_reg(s, a->rt, tmp);
1731     }
1732     if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
1733         tmp = tcg_temp_new_i32();
1734         read_neon_element32(tmp, vd + 1, a->idx, MO_32);
1735         store_reg(s, a->rt2, tmp);
1736     }
1737 
1738     mve_update_and_store_eci(s);
1739     return true;
1740 }
1741 
1742 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
1743 {
1744     /*
1745      * VMOV two general-purpose registers to two 32-bit vector lanes.
1746      * This insn is not predicated but it is subject to beat-wise
1747      * execution if it is not in an IT block. For us this means
1748      * only that if PSR.ECI says we should not be executing the beat
1749      * corresponding to the lane of the vector register being accessed
1750      * then we should skip perfoming the move, and that we need to do
1751      * the usual check for bad ECI state and advance of ECI state.
1752      * (If PSR.ECI is non-zero then we cannot be in an IT block.)
1753      */
1754     TCGv_i32 tmp;
1755     int vd;
1756 
1757     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
1758         a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
1759         /* Rt/Rt2 cases are UNPREDICTABLE */
1760         return false;
1761     }
1762     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1763         return true;
1764     }
1765 
1766     /* Convert Qreg idx to Dreg for read_neon_element32() etc */
1767     vd = a->qd * 2;
1768 
1769     if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
1770         tmp = load_reg(s, a->rt);
1771         write_neon_element32(tmp, vd, a->idx, MO_32);
1772         tcg_temp_free_i32(tmp);
1773     }
1774     if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
1775         tmp = load_reg(s, a->rt2);
1776         write_neon_element32(tmp, vd + 1, a->idx, MO_32);
1777         tcg_temp_free_i32(tmp);
1778     }
1779 
1780     mve_update_and_store_eci(s);
1781     return true;
1782 }
1783