xref: /qemu/target/arm/tcg/translate-mve.c (revision dc18628b1833157a50a424cb6b83b63eca560402)
1 /*
2  *  ARM translation: M-profile MVE instructions
3  *
4  *  Copyright (c) 2021 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
27 
28 static inline int vidup_imm(DisasContext *s, int x)
29 {
30     return 1 << x;
31 }
32 
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
35 
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
38 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
39 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
40 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
41 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
42 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
43 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
44 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
45 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
46 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
47 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
48 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
49 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
50 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
51 
52 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
53 static inline long mve_qreg_offset(unsigned reg)
54 {
55     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
56 }
57 
58 static TCGv_ptr mve_qreg_ptr(unsigned reg)
59 {
60     TCGv_ptr ret = tcg_temp_new_ptr();
61     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
62     return ret;
63 }
64 
65 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
66 {
67     /*
68      * Check whether Qregs are in range. For v8.1M only Q0..Q7
69      * are supported, see VFPSmallRegisterBank().
70      */
71     return qmask < 8;
72 }
73 
74 bool mve_eci_check(DisasContext *s)
75 {
76     /*
77      * This is a beatwise insn: check that ECI is valid (not a
78      * reserved value) and note that we are handling it.
79      * Return true if OK, false if we generated an exception.
80      */
81     s->eci_handled = true;
82     switch (s->eci) {
83     case ECI_NONE:
84     case ECI_A0:
85     case ECI_A0A1:
86     case ECI_A0A1A2:
87     case ECI_A0A1A2B0:
88         return true;
89     default:
90         /* Reserved value: INVSTATE UsageFault */
91         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
92                            default_exception_el(s));
93         return false;
94     }
95 }
96 
97 void mve_update_eci(DisasContext *s)
98 {
99     /*
100      * The helper function will always update the CPUState field,
101      * so we only need to update the DisasContext field.
102      */
103     if (s->eci) {
104         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
105     }
106 }
107 
108 void mve_update_and_store_eci(DisasContext *s)
109 {
110     /*
111      * For insns which don't call a helper function that will call
112      * mve_advance_vpt(), this version updates s->eci and also stores
113      * it out to the CPUState field.
114      */
115     if (s->eci) {
116         mve_update_eci(s);
117         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
118     }
119 }
120 
121 static bool mve_skip_first_beat(DisasContext *s)
122 {
123     /* Return true if PSR.ECI says we must skip the first beat of this insn */
124     switch (s->eci) {
125     case ECI_NONE:
126         return false;
127     case ECI_A0:
128     case ECI_A0A1:
129     case ECI_A0A1A2:
130     case ECI_A0A1A2B0:
131         return true;
132     default:
133         g_assert_not_reached();
134     }
135 }
136 
137 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
138                     unsigned msize)
139 {
140     TCGv_i32 addr;
141     uint32_t offset;
142     TCGv_ptr qreg;
143 
144     if (!dc_isar_feature(aa32_mve, s) ||
145         !mve_check_qreg_bank(s, a->qd) ||
146         !fn) {
147         return false;
148     }
149 
150     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
151     if (a->rn == 15 || (a->rn == 13 && a->w)) {
152         return false;
153     }
154 
155     if (!mve_eci_check(s) || !vfp_access_check(s)) {
156         return true;
157     }
158 
159     offset = a->imm << msize;
160     if (!a->a) {
161         offset = -offset;
162     }
163     addr = load_reg(s, a->rn);
164     if (a->p) {
165         tcg_gen_addi_i32(addr, addr, offset);
166     }
167 
168     qreg = mve_qreg_ptr(a->qd);
169     fn(cpu_env, qreg, addr);
170     tcg_temp_free_ptr(qreg);
171 
172     /*
173      * Writeback always happens after the last beat of the insn,
174      * regardless of predication
175      */
176     if (a->w) {
177         if (!a->p) {
178             tcg_gen_addi_i32(addr, addr, offset);
179         }
180         store_reg(s, a->rn, addr);
181     } else {
182         tcg_temp_free_i32(addr);
183     }
184     mve_update_eci(s);
185     return true;
186 }
187 
188 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
189 {
190     static MVEGenLdStFn * const ldstfns[4][2] = {
191         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
192         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
193         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
194         { NULL, NULL }
195     };
196     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
197 }
198 
199 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
200     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
201     {                                                           \
202         static MVEGenLdStFn * const ldstfns[2][2] = {           \
203             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
204             { NULL, gen_helper_mve_##ULD },                     \
205         };                                                      \
206         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
207     }
208 
209 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
210 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
211 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
212 
213 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
214 {
215     TCGv_i32 addr;
216     TCGv_ptr qd, qm;
217 
218     if (!dc_isar_feature(aa32_mve, s) ||
219         !mve_check_qreg_bank(s, a->qd | a->qm) ||
220         !fn || a->rn == 15) {
221         /* Rn case is UNPREDICTABLE */
222         return false;
223     }
224 
225     if (!mve_eci_check(s) || !vfp_access_check(s)) {
226         return true;
227     }
228 
229     addr = load_reg(s, a->rn);
230 
231     qd = mve_qreg_ptr(a->qd);
232     qm = mve_qreg_ptr(a->qm);
233     fn(cpu_env, qd, qm, addr);
234     tcg_temp_free_ptr(qd);
235     tcg_temp_free_ptr(qm);
236     tcg_temp_free_i32(addr);
237     mve_update_eci(s);
238     return true;
239 }
240 
241 /*
242  * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
243  * signextended to halfword elements in register". _os_ indicates that
244  * the offsets in Qm should be scaled by the element size.
245  */
246 /* This macro is just to make the arrays more compact in these functions */
247 #define F(N) gen_helper_mve_##N
248 
249 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
250 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
251 {
252     static MVEGenLdStSGFn * const fns[2][4][4] = { {
253             { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
254             { NULL, NULL,           F(vldrh_sg_sw), NULL },
255             { NULL, NULL,           NULL,           NULL },
256             { NULL, NULL,           NULL,           NULL }
257         }, {
258             { NULL, NULL,              NULL,              NULL },
259             { NULL, NULL,              F(vldrh_sg_os_sw), NULL },
260             { NULL, NULL,              NULL,              NULL },
261             { NULL, NULL,              NULL,              NULL }
262         }
263     };
264     if (a->qd == a->qm) {
265         return false; /* UNPREDICTABLE */
266     }
267     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
268 }
269 
270 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
271 {
272     static MVEGenLdStSGFn * const fns[2][4][4] = { {
273             { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
274             { NULL,           F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
275             { NULL,           NULL,           F(vldrw_sg_uw), NULL },
276             { NULL,           NULL,           NULL,           F(vldrd_sg_ud) }
277         }, {
278             { NULL, NULL,              NULL,              NULL },
279             { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
280             { NULL, NULL,              F(vldrw_sg_os_uw), NULL },
281             { NULL, NULL,              NULL,              F(vldrd_sg_os_ud) }
282         }
283     };
284     if (a->qd == a->qm) {
285         return false; /* UNPREDICTABLE */
286     }
287     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
288 }
289 
290 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
291 {
292     static MVEGenLdStSGFn * const fns[2][4][4] = { {
293             { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
294             { NULL,           F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
295             { NULL,           NULL,           F(vstrw_sg_uw), NULL },
296             { NULL,           NULL,           NULL,           F(vstrd_sg_ud) }
297         }, {
298             { NULL, NULL,              NULL,              NULL },
299             { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
300             { NULL, NULL,              F(vstrw_sg_os_uw), NULL },
301             { NULL, NULL,              NULL,              F(vstrd_sg_os_ud) }
302         }
303     };
304     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
305 }
306 
307 #undef F
308 
309 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
310 {
311     TCGv_ptr qd;
312     TCGv_i32 rt;
313 
314     if (!dc_isar_feature(aa32_mve, s) ||
315         !mve_check_qreg_bank(s, a->qd)) {
316         return false;
317     }
318     if (a->rt == 13 || a->rt == 15) {
319         /* UNPREDICTABLE; we choose to UNDEF */
320         return false;
321     }
322     if (!mve_eci_check(s) || !vfp_access_check(s)) {
323         return true;
324     }
325 
326     qd = mve_qreg_ptr(a->qd);
327     rt = load_reg(s, a->rt);
328     tcg_gen_dup_i32(a->size, rt, rt);
329     gen_helper_mve_vdup(cpu_env, qd, rt);
330     tcg_temp_free_ptr(qd);
331     tcg_temp_free_i32(rt);
332     mve_update_eci(s);
333     return true;
334 }
335 
336 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
337 {
338     TCGv_ptr qd, qm;
339 
340     if (!dc_isar_feature(aa32_mve, s) ||
341         !mve_check_qreg_bank(s, a->qd | a->qm) ||
342         !fn) {
343         return false;
344     }
345 
346     if (!mve_eci_check(s) || !vfp_access_check(s)) {
347         return true;
348     }
349 
350     qd = mve_qreg_ptr(a->qd);
351     qm = mve_qreg_ptr(a->qm);
352     fn(cpu_env, qd, qm);
353     tcg_temp_free_ptr(qd);
354     tcg_temp_free_ptr(qm);
355     mve_update_eci(s);
356     return true;
357 }
358 
359 #define DO_1OP(INSN, FN)                                        \
360     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
361     {                                                           \
362         static MVEGenOneOpFn * const fns[] = {                  \
363             gen_helper_mve_##FN##b,                             \
364             gen_helper_mve_##FN##h,                             \
365             gen_helper_mve_##FN##w,                             \
366             NULL,                                               \
367         };                                                      \
368         return do_1op(s, a, fns[a->size]);                      \
369     }
370 
371 DO_1OP(VCLZ, vclz)
372 DO_1OP(VCLS, vcls)
373 DO_1OP(VABS, vabs)
374 DO_1OP(VNEG, vneg)
375 DO_1OP(VQABS, vqabs)
376 DO_1OP(VQNEG, vqneg)
377 DO_1OP(VMAXA, vmaxa)
378 DO_1OP(VMINA, vmina)
379 
380 /* Narrowing moves: only size 0 and 1 are valid */
381 #define DO_VMOVN(INSN, FN) \
382     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
383     {                                                           \
384         static MVEGenOneOpFn * const fns[] = {                  \
385             gen_helper_mve_##FN##b,                             \
386             gen_helper_mve_##FN##h,                             \
387             NULL,                                               \
388             NULL,                                               \
389         };                                                      \
390         return do_1op(s, a, fns[a->size]);                      \
391     }
392 
393 DO_VMOVN(VMOVNB, vmovnb)
394 DO_VMOVN(VMOVNT, vmovnt)
395 DO_VMOVN(VQMOVUNB, vqmovunb)
396 DO_VMOVN(VQMOVUNT, vqmovunt)
397 DO_VMOVN(VQMOVN_BS, vqmovnbs)
398 DO_VMOVN(VQMOVN_TS, vqmovnts)
399 DO_VMOVN(VQMOVN_BU, vqmovnbu)
400 DO_VMOVN(VQMOVN_TU, vqmovntu)
401 
402 static bool trans_VREV16(DisasContext *s, arg_1op *a)
403 {
404     static MVEGenOneOpFn * const fns[] = {
405         gen_helper_mve_vrev16b,
406         NULL,
407         NULL,
408         NULL,
409     };
410     return do_1op(s, a, fns[a->size]);
411 }
412 
413 static bool trans_VREV32(DisasContext *s, arg_1op *a)
414 {
415     static MVEGenOneOpFn * const fns[] = {
416         gen_helper_mve_vrev32b,
417         gen_helper_mve_vrev32h,
418         NULL,
419         NULL,
420     };
421     return do_1op(s, a, fns[a->size]);
422 }
423 
424 static bool trans_VREV64(DisasContext *s, arg_1op *a)
425 {
426     static MVEGenOneOpFn * const fns[] = {
427         gen_helper_mve_vrev64b,
428         gen_helper_mve_vrev64h,
429         gen_helper_mve_vrev64w,
430         NULL,
431     };
432     return do_1op(s, a, fns[a->size]);
433 }
434 
435 static bool trans_VMVN(DisasContext *s, arg_1op *a)
436 {
437     return do_1op(s, a, gen_helper_mve_vmvn);
438 }
439 
440 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
441 {
442     static MVEGenOneOpFn * const fns[] = {
443         NULL,
444         gen_helper_mve_vfabsh,
445         gen_helper_mve_vfabss,
446         NULL,
447     };
448     if (!dc_isar_feature(aa32_mve_fp, s)) {
449         return false;
450     }
451     return do_1op(s, a, fns[a->size]);
452 }
453 
454 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
455 {
456     static MVEGenOneOpFn * const fns[] = {
457         NULL,
458         gen_helper_mve_vfnegh,
459         gen_helper_mve_vfnegs,
460         NULL,
461     };
462     if (!dc_isar_feature(aa32_mve_fp, s)) {
463         return false;
464     }
465     return do_1op(s, a, fns[a->size]);
466 }
467 
468 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
469 {
470     TCGv_ptr qd, qn, qm;
471 
472     if (!dc_isar_feature(aa32_mve, s) ||
473         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
474         !fn) {
475         return false;
476     }
477     if (!mve_eci_check(s) || !vfp_access_check(s)) {
478         return true;
479     }
480 
481     qd = mve_qreg_ptr(a->qd);
482     qn = mve_qreg_ptr(a->qn);
483     qm = mve_qreg_ptr(a->qm);
484     fn(cpu_env, qd, qn, qm);
485     tcg_temp_free_ptr(qd);
486     tcg_temp_free_ptr(qn);
487     tcg_temp_free_ptr(qm);
488     mve_update_eci(s);
489     return true;
490 }
491 
492 #define DO_LOGIC(INSN, HELPER)                                  \
493     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
494     {                                                           \
495         return do_2op(s, a, HELPER);                            \
496     }
497 
498 DO_LOGIC(VAND, gen_helper_mve_vand)
499 DO_LOGIC(VBIC, gen_helper_mve_vbic)
500 DO_LOGIC(VORR, gen_helper_mve_vorr)
501 DO_LOGIC(VORN, gen_helper_mve_vorn)
502 DO_LOGIC(VEOR, gen_helper_mve_veor)
503 
504 DO_LOGIC(VPSEL, gen_helper_mve_vpsel)
505 
506 #define DO_2OP(INSN, FN) \
507     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
508     {                                                           \
509         static MVEGenTwoOpFn * const fns[] = {                  \
510             gen_helper_mve_##FN##b,                             \
511             gen_helper_mve_##FN##h,                             \
512             gen_helper_mve_##FN##w,                             \
513             NULL,                                               \
514         };                                                      \
515         return do_2op(s, a, fns[a->size]);                      \
516     }
517 
518 DO_2OP(VADD, vadd)
519 DO_2OP(VSUB, vsub)
520 DO_2OP(VMUL, vmul)
521 DO_2OP(VMULH_S, vmulhs)
522 DO_2OP(VMULH_U, vmulhu)
523 DO_2OP(VRMULH_S, vrmulhs)
524 DO_2OP(VRMULH_U, vrmulhu)
525 DO_2OP(VMAX_S, vmaxs)
526 DO_2OP(VMAX_U, vmaxu)
527 DO_2OP(VMIN_S, vmins)
528 DO_2OP(VMIN_U, vminu)
529 DO_2OP(VABD_S, vabds)
530 DO_2OP(VABD_U, vabdu)
531 DO_2OP(VHADD_S, vhadds)
532 DO_2OP(VHADD_U, vhaddu)
533 DO_2OP(VHSUB_S, vhsubs)
534 DO_2OP(VHSUB_U, vhsubu)
535 DO_2OP(VMULL_BS, vmullbs)
536 DO_2OP(VMULL_BU, vmullbu)
537 DO_2OP(VMULL_TS, vmullts)
538 DO_2OP(VMULL_TU, vmulltu)
539 DO_2OP(VQDMULH, vqdmulh)
540 DO_2OP(VQRDMULH, vqrdmulh)
541 DO_2OP(VQADD_S, vqadds)
542 DO_2OP(VQADD_U, vqaddu)
543 DO_2OP(VQSUB_S, vqsubs)
544 DO_2OP(VQSUB_U, vqsubu)
545 DO_2OP(VSHL_S, vshls)
546 DO_2OP(VSHL_U, vshlu)
547 DO_2OP(VRSHL_S, vrshls)
548 DO_2OP(VRSHL_U, vrshlu)
549 DO_2OP(VQSHL_S, vqshls)
550 DO_2OP(VQSHL_U, vqshlu)
551 DO_2OP(VQRSHL_S, vqrshls)
552 DO_2OP(VQRSHL_U, vqrshlu)
553 DO_2OP(VQDMLADH, vqdmladh)
554 DO_2OP(VQDMLADHX, vqdmladhx)
555 DO_2OP(VQRDMLADH, vqrdmladh)
556 DO_2OP(VQRDMLADHX, vqrdmladhx)
557 DO_2OP(VQDMLSDH, vqdmlsdh)
558 DO_2OP(VQDMLSDHX, vqdmlsdhx)
559 DO_2OP(VQRDMLSDH, vqrdmlsdh)
560 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
561 DO_2OP(VRHADD_S, vrhadds)
562 DO_2OP(VRHADD_U, vrhaddu)
563 /*
564  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
565  * so we can reuse the DO_2OP macro. (Our implementation calculates the
566  * "expected" results in this case.) Similarly for VHCADD.
567  */
568 DO_2OP(VCADD90, vcadd90)
569 DO_2OP(VCADD270, vcadd270)
570 DO_2OP(VHCADD90, vhcadd90)
571 DO_2OP(VHCADD270, vhcadd270)
572 
573 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
574 {
575     static MVEGenTwoOpFn * const fns[] = {
576         NULL,
577         gen_helper_mve_vqdmullbh,
578         gen_helper_mve_vqdmullbw,
579         NULL,
580     };
581     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
582         /* UNPREDICTABLE; we choose to undef */
583         return false;
584     }
585     return do_2op(s, a, fns[a->size]);
586 }
587 
588 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
589 {
590     static MVEGenTwoOpFn * const fns[] = {
591         NULL,
592         gen_helper_mve_vqdmullth,
593         gen_helper_mve_vqdmulltw,
594         NULL,
595     };
596     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
597         /* UNPREDICTABLE; we choose to undef */
598         return false;
599     }
600     return do_2op(s, a, fns[a->size]);
601 }
602 
603 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
604 {
605     /*
606      * Note that a->size indicates the output size, ie VMULL.P8
607      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
608      * is the 16x16->32 operation and a->size is MO_32.
609      */
610     static MVEGenTwoOpFn * const fns[] = {
611         NULL,
612         gen_helper_mve_vmullpbh,
613         gen_helper_mve_vmullpbw,
614         NULL,
615     };
616     return do_2op(s, a, fns[a->size]);
617 }
618 
619 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
620 {
621     /* a->size is as for trans_VMULLP_B */
622     static MVEGenTwoOpFn * const fns[] = {
623         NULL,
624         gen_helper_mve_vmullpth,
625         gen_helper_mve_vmullptw,
626         NULL,
627     };
628     return do_2op(s, a, fns[a->size]);
629 }
630 
631 /*
632  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
633  * of the 32-bit elements in each lane of the input vectors, where the
634  * carry-out of each add is the carry-in of the next.  The initial carry
635  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
636  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
637  * These insns are subject to beat-wise execution.  Partial execution
638  * of an I=1 (initial carry input fixed) insn which does not
639  * execute the first beat must start with the current FPSCR.NZCV
640  * value, not the fixed constant input.
641  */
642 static bool trans_VADC(DisasContext *s, arg_2op *a)
643 {
644     return do_2op(s, a, gen_helper_mve_vadc);
645 }
646 
647 static bool trans_VADCI(DisasContext *s, arg_2op *a)
648 {
649     if (mve_skip_first_beat(s)) {
650         return trans_VADC(s, a);
651     }
652     return do_2op(s, a, gen_helper_mve_vadci);
653 }
654 
655 static bool trans_VSBC(DisasContext *s, arg_2op *a)
656 {
657     return do_2op(s, a, gen_helper_mve_vsbc);
658 }
659 
660 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
661 {
662     if (mve_skip_first_beat(s)) {
663         return trans_VSBC(s, a);
664     }
665     return do_2op(s, a, gen_helper_mve_vsbci);
666 }
667 
668 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
669                           MVEGenTwoOpScalarFn fn)
670 {
671     TCGv_ptr qd, qn;
672     TCGv_i32 rm;
673 
674     if (!dc_isar_feature(aa32_mve, s) ||
675         !mve_check_qreg_bank(s, a->qd | a->qn) ||
676         !fn) {
677         return false;
678     }
679     if (a->rm == 13 || a->rm == 15) {
680         /* UNPREDICTABLE */
681         return false;
682     }
683     if (!mve_eci_check(s) || !vfp_access_check(s)) {
684         return true;
685     }
686 
687     qd = mve_qreg_ptr(a->qd);
688     qn = mve_qreg_ptr(a->qn);
689     rm = load_reg(s, a->rm);
690     fn(cpu_env, qd, qn, rm);
691     tcg_temp_free_i32(rm);
692     tcg_temp_free_ptr(qd);
693     tcg_temp_free_ptr(qn);
694     mve_update_eci(s);
695     return true;
696 }
697 
698 #define DO_2OP_SCALAR(INSN, FN) \
699     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
700     {                                                           \
701         static MVEGenTwoOpScalarFn * const fns[] = {            \
702             gen_helper_mve_##FN##b,                             \
703             gen_helper_mve_##FN##h,                             \
704             gen_helper_mve_##FN##w,                             \
705             NULL,                                               \
706         };                                                      \
707         return do_2op_scalar(s, a, fns[a->size]);               \
708     }
709 
710 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
711 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
712 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
713 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
714 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
715 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
716 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
717 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
718 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
719 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
720 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
721 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
722 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
723 DO_2OP_SCALAR(VBRSR, vbrsr)
724 DO_2OP_SCALAR(VMLA, vmla)
725 DO_2OP_SCALAR(VMLAS, vmlas)
726 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
727 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
728 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
729 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
730 
731 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
732 {
733     static MVEGenTwoOpScalarFn * const fns[] = {
734         NULL,
735         gen_helper_mve_vqdmullb_scalarh,
736         gen_helper_mve_vqdmullb_scalarw,
737         NULL,
738     };
739     if (a->qd == a->qn && a->size == MO_32) {
740         /* UNPREDICTABLE; we choose to undef */
741         return false;
742     }
743     return do_2op_scalar(s, a, fns[a->size]);
744 }
745 
746 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
747 {
748     static MVEGenTwoOpScalarFn * const fns[] = {
749         NULL,
750         gen_helper_mve_vqdmullt_scalarh,
751         gen_helper_mve_vqdmullt_scalarw,
752         NULL,
753     };
754     if (a->qd == a->qn && a->size == MO_32) {
755         /* UNPREDICTABLE; we choose to undef */
756         return false;
757     }
758     return do_2op_scalar(s, a, fns[a->size]);
759 }
760 
761 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
762                              MVEGenLongDualAccOpFn *fn)
763 {
764     TCGv_ptr qn, qm;
765     TCGv_i64 rda;
766     TCGv_i32 rdalo, rdahi;
767 
768     if (!dc_isar_feature(aa32_mve, s) ||
769         !mve_check_qreg_bank(s, a->qn | a->qm) ||
770         !fn) {
771         return false;
772     }
773     /*
774      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
775      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
776      */
777     if (a->rdahi == 13 || a->rdahi == 15) {
778         return false;
779     }
780     if (!mve_eci_check(s) || !vfp_access_check(s)) {
781         return true;
782     }
783 
784     qn = mve_qreg_ptr(a->qn);
785     qm = mve_qreg_ptr(a->qm);
786 
787     /*
788      * This insn is subject to beat-wise execution. Partial execution
789      * of an A=0 (no-accumulate) insn which does not execute the first
790      * beat must start with the current rda value, not 0.
791      */
792     if (a->a || mve_skip_first_beat(s)) {
793         rda = tcg_temp_new_i64();
794         rdalo = load_reg(s, a->rdalo);
795         rdahi = load_reg(s, a->rdahi);
796         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
797         tcg_temp_free_i32(rdalo);
798         tcg_temp_free_i32(rdahi);
799     } else {
800         rda = tcg_const_i64(0);
801     }
802 
803     fn(rda, cpu_env, qn, qm, rda);
804     tcg_temp_free_ptr(qn);
805     tcg_temp_free_ptr(qm);
806 
807     rdalo = tcg_temp_new_i32();
808     rdahi = tcg_temp_new_i32();
809     tcg_gen_extrl_i64_i32(rdalo, rda);
810     tcg_gen_extrh_i64_i32(rdahi, rda);
811     store_reg(s, a->rdalo, rdalo);
812     store_reg(s, a->rdahi, rdahi);
813     tcg_temp_free_i64(rda);
814     mve_update_eci(s);
815     return true;
816 }
817 
818 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
819 {
820     static MVEGenLongDualAccOpFn * const fns[4][2] = {
821         { NULL, NULL },
822         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
823         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
824         { NULL, NULL },
825     };
826     return do_long_dual_acc(s, a, fns[a->size][a->x]);
827 }
828 
829 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
830 {
831     static MVEGenLongDualAccOpFn * const fns[4][2] = {
832         { NULL, NULL },
833         { gen_helper_mve_vmlaldavuh, NULL },
834         { gen_helper_mve_vmlaldavuw, NULL },
835         { NULL, NULL },
836     };
837     return do_long_dual_acc(s, a, fns[a->size][a->x]);
838 }
839 
840 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
841 {
842     static MVEGenLongDualAccOpFn * const fns[4][2] = {
843         { NULL, NULL },
844         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
845         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
846         { NULL, NULL },
847     };
848     return do_long_dual_acc(s, a, fns[a->size][a->x]);
849 }
850 
851 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
852 {
853     static MVEGenLongDualAccOpFn * const fns[] = {
854         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
855     };
856     return do_long_dual_acc(s, a, fns[a->x]);
857 }
858 
859 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
860 {
861     static MVEGenLongDualAccOpFn * const fns[] = {
862         gen_helper_mve_vrmlaldavhuw, NULL,
863     };
864     return do_long_dual_acc(s, a, fns[a->x]);
865 }
866 
867 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
868 {
869     static MVEGenLongDualAccOpFn * const fns[] = {
870         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
871     };
872     return do_long_dual_acc(s, a, fns[a->x]);
873 }
874 
875 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
876 {
877     TCGv_ptr qn, qm;
878     TCGv_i32 rda;
879 
880     if (!dc_isar_feature(aa32_mve, s) ||
881         !mve_check_qreg_bank(s, a->qn) ||
882         !fn) {
883         return false;
884     }
885     if (!mve_eci_check(s) || !vfp_access_check(s)) {
886         return true;
887     }
888 
889     qn = mve_qreg_ptr(a->qn);
890     qm = mve_qreg_ptr(a->qm);
891 
892     /*
893      * This insn is subject to beat-wise execution. Partial execution
894      * of an A=0 (no-accumulate) insn which does not execute the first
895      * beat must start with the current rda value, not 0.
896      */
897     if (a->a || mve_skip_first_beat(s)) {
898         rda = load_reg(s, a->rda);
899     } else {
900         rda = tcg_const_i32(0);
901     }
902 
903     fn(rda, cpu_env, qn, qm, rda);
904     store_reg(s, a->rda, rda);
905     tcg_temp_free_ptr(qn);
906     tcg_temp_free_ptr(qm);
907 
908     mve_update_eci(s);
909     return true;
910 }
911 
912 #define DO_DUAL_ACC(INSN, FN)                                           \
913     static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
914     {                                                                   \
915         static MVEGenDualAccOpFn * const fns[4][2] = {                  \
916             { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
917             { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
918             { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
919             { NULL, NULL },                                             \
920         };                                                              \
921         return do_dual_acc(s, a, fns[a->size][a->x]);                   \
922     }
923 
924 DO_DUAL_ACC(VMLADAV_S, vmladavs)
925 DO_DUAL_ACC(VMLSDAV, vmlsdav)
926 
927 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
928 {
929     static MVEGenDualAccOpFn * const fns[4][2] = {
930         { gen_helper_mve_vmladavub, NULL },
931         { gen_helper_mve_vmladavuh, NULL },
932         { gen_helper_mve_vmladavuw, NULL },
933         { NULL, NULL },
934     };
935     return do_dual_acc(s, a, fns[a->size][a->x]);
936 }
937 
938 static void gen_vpst(DisasContext *s, uint32_t mask)
939 {
940     /*
941      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
942      * being adjacent fields in the register.
943      *
944      * Updating the masks is not predicated, but it is subject to beat-wise
945      * execution, and the mask is updated on the odd-numbered beats.
946      * So if PSR.ECI says we should skip beat 1, we mustn't update the
947      * 01 mask field.
948      */
949     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
950     switch (s->eci) {
951     case ECI_NONE:
952     case ECI_A0:
953         /* Update both 01 and 23 fields */
954         tcg_gen_deposit_i32(vpr, vpr,
955                             tcg_constant_i32(mask | (mask << 4)),
956                             R_V7M_VPR_MASK01_SHIFT,
957                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
958         break;
959     case ECI_A0A1:
960     case ECI_A0A1A2:
961     case ECI_A0A1A2B0:
962         /* Update only the 23 mask field */
963         tcg_gen_deposit_i32(vpr, vpr,
964                             tcg_constant_i32(mask),
965                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
966         break;
967     default:
968         g_assert_not_reached();
969     }
970     store_cpu_field(vpr, v7m.vpr);
971 }
972 
973 static bool trans_VPST(DisasContext *s, arg_VPST *a)
974 {
975     /* mask == 0 is a "related encoding" */
976     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
977         return false;
978     }
979     if (!mve_eci_check(s) || !vfp_access_check(s)) {
980         return true;
981     }
982     gen_vpst(s, a->mask);
983     mve_update_and_store_eci(s);
984     return true;
985 }
986 
987 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
988 {
989     /*
990      * Invert the predicate in VPR.P0. We have call out to
991      * a helper because this insn itself is beatwise and can
992      * be predicated.
993      */
994     if (!dc_isar_feature(aa32_mve, s)) {
995         return false;
996     }
997     if (!mve_eci_check(s) || !vfp_access_check(s)) {
998         return true;
999     }
1000 
1001     gen_helper_mve_vpnot(cpu_env);
1002     mve_update_eci(s);
1003     return true;
1004 }
1005 
1006 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
1007 {
1008     /* VADDV: vector add across vector */
1009     static MVEGenVADDVFn * const fns[4][2] = {
1010         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
1011         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
1012         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
1013         { NULL, NULL }
1014     };
1015     TCGv_ptr qm;
1016     TCGv_i32 rda;
1017 
1018     if (!dc_isar_feature(aa32_mve, s) ||
1019         a->size == 3) {
1020         return false;
1021     }
1022     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1023         return true;
1024     }
1025 
1026     /*
1027      * This insn is subject to beat-wise execution. Partial execution
1028      * of an A=0 (no-accumulate) insn which does not execute the first
1029      * beat must start with the current value of Rda, not zero.
1030      */
1031     if (a->a || mve_skip_first_beat(s)) {
1032         /* Accumulate input from Rda */
1033         rda = load_reg(s, a->rda);
1034     } else {
1035         /* Accumulate starting at zero */
1036         rda = tcg_const_i32(0);
1037     }
1038 
1039     qm = mve_qreg_ptr(a->qm);
1040     fns[a->size][a->u](rda, cpu_env, qm, rda);
1041     store_reg(s, a->rda, rda);
1042     tcg_temp_free_ptr(qm);
1043 
1044     mve_update_eci(s);
1045     return true;
1046 }
1047 
1048 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
1049 {
1050     /*
1051      * Vector Add Long Across Vector: accumulate the 32-bit
1052      * elements of the vector into a 64-bit result stored in
1053      * a pair of general-purpose registers.
1054      * No need to check Qm's bank: it is only 3 bits in decode.
1055      */
1056     TCGv_ptr qm;
1057     TCGv_i64 rda;
1058     TCGv_i32 rdalo, rdahi;
1059 
1060     if (!dc_isar_feature(aa32_mve, s)) {
1061         return false;
1062     }
1063     /*
1064      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1065      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1066      */
1067     if (a->rdahi == 13 || a->rdahi == 15) {
1068         return false;
1069     }
1070     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1071         return true;
1072     }
1073 
1074     /*
1075      * This insn is subject to beat-wise execution. Partial execution
1076      * of an A=0 (no-accumulate) insn which does not execute the first
1077      * beat must start with the current value of RdaHi:RdaLo, not zero.
1078      */
1079     if (a->a || mve_skip_first_beat(s)) {
1080         /* Accumulate input from RdaHi:RdaLo */
1081         rda = tcg_temp_new_i64();
1082         rdalo = load_reg(s, a->rdalo);
1083         rdahi = load_reg(s, a->rdahi);
1084         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
1085         tcg_temp_free_i32(rdalo);
1086         tcg_temp_free_i32(rdahi);
1087     } else {
1088         /* Accumulate starting at zero */
1089         rda = tcg_const_i64(0);
1090     }
1091 
1092     qm = mve_qreg_ptr(a->qm);
1093     if (a->u) {
1094         gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
1095     } else {
1096         gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
1097     }
1098     tcg_temp_free_ptr(qm);
1099 
1100     rdalo = tcg_temp_new_i32();
1101     rdahi = tcg_temp_new_i32();
1102     tcg_gen_extrl_i64_i32(rdalo, rda);
1103     tcg_gen_extrh_i64_i32(rdahi, rda);
1104     store_reg(s, a->rdalo, rdalo);
1105     store_reg(s, a->rdahi, rdahi);
1106     tcg_temp_free_i64(rda);
1107     mve_update_eci(s);
1108     return true;
1109 }
1110 
1111 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
1112 {
1113     TCGv_ptr qd;
1114     uint64_t imm;
1115 
1116     if (!dc_isar_feature(aa32_mve, s) ||
1117         !mve_check_qreg_bank(s, a->qd) ||
1118         !fn) {
1119         return false;
1120     }
1121     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1122         return true;
1123     }
1124 
1125     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1126 
1127     qd = mve_qreg_ptr(a->qd);
1128     fn(cpu_env, qd, tcg_constant_i64(imm));
1129     tcg_temp_free_ptr(qd);
1130     mve_update_eci(s);
1131     return true;
1132 }
1133 
1134 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1135 {
1136     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1137     MVEGenOneOpImmFn *fn;
1138 
1139     if ((a->cmode & 1) && a->cmode < 12) {
1140         if (a->op) {
1141             /*
1142              * For op=1, the immediate will be inverted by asimd_imm_const(),
1143              * so the VBIC becomes a logical AND operation.
1144              */
1145             fn = gen_helper_mve_vandi;
1146         } else {
1147             fn = gen_helper_mve_vorri;
1148         }
1149     } else {
1150         /* There is one unallocated cmode/op combination in this space */
1151         if (a->cmode == 15 && a->op == 1) {
1152             return false;
1153         }
1154         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1155         fn = gen_helper_mve_vmovi;
1156     }
1157     return do_1imm(s, a, fn);
1158 }
1159 
1160 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1161                       bool negateshift)
1162 {
1163     TCGv_ptr qd, qm;
1164     int shift = a->shift;
1165 
1166     if (!dc_isar_feature(aa32_mve, s) ||
1167         !mve_check_qreg_bank(s, a->qd | a->qm) ||
1168         !fn) {
1169         return false;
1170     }
1171     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1172         return true;
1173     }
1174 
1175     /*
1176      * When we handle a right shift insn using a left-shift helper
1177      * which permits a negative shift count to indicate a right-shift,
1178      * we must negate the shift count.
1179      */
1180     if (negateshift) {
1181         shift = -shift;
1182     }
1183 
1184     qd = mve_qreg_ptr(a->qd);
1185     qm = mve_qreg_ptr(a->qm);
1186     fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1187     tcg_temp_free_ptr(qd);
1188     tcg_temp_free_ptr(qm);
1189     mve_update_eci(s);
1190     return true;
1191 }
1192 
1193 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)                         \
1194     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1195     {                                                           \
1196         static MVEGenTwoOpShiftFn * const fns[] = {             \
1197             gen_helper_mve_##FN##b,                             \
1198             gen_helper_mve_##FN##h,                             \
1199             gen_helper_mve_##FN##w,                             \
1200             NULL,                                               \
1201         };                                                      \
1202         return do_2shift(s, a, fns[a->size], NEGATESHIFT);      \
1203     }
1204 
1205 DO_2SHIFT(VSHLI, vshli_u, false)
1206 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1207 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1208 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1209 /* These right shifts use a left-shift helper with negated shift count */
1210 DO_2SHIFT(VSHRI_S, vshli_s, true)
1211 DO_2SHIFT(VSHRI_U, vshli_u, true)
1212 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1213 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1214 
1215 DO_2SHIFT(VSRI, vsri, false)
1216 DO_2SHIFT(VSLI, vsli, false)
1217 
1218 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1219                              MVEGenTwoOpShiftFn *fn)
1220 {
1221     TCGv_ptr qda;
1222     TCGv_i32 rm;
1223 
1224     if (!dc_isar_feature(aa32_mve, s) ||
1225         !mve_check_qreg_bank(s, a->qda) ||
1226         a->rm == 13 || a->rm == 15 || !fn) {
1227         /* Rm cases are UNPREDICTABLE */
1228         return false;
1229     }
1230     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1231         return true;
1232     }
1233 
1234     qda = mve_qreg_ptr(a->qda);
1235     rm = load_reg(s, a->rm);
1236     fn(cpu_env, qda, qda, rm);
1237     tcg_temp_free_ptr(qda);
1238     tcg_temp_free_i32(rm);
1239     mve_update_eci(s);
1240     return true;
1241 }
1242 
1243 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
1244     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
1245     {                                                                   \
1246         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1247             gen_helper_mve_##FN##b,                                     \
1248             gen_helper_mve_##FN##h,                                     \
1249             gen_helper_mve_##FN##w,                                     \
1250             NULL,                                                       \
1251         };                                                              \
1252         return do_2shift_scalar(s, a, fns[a->size]);                    \
1253     }
1254 
1255 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1256 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1257 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1258 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1259 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1260 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1261 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1262 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1263 
1264 #define DO_VSHLL(INSN, FN)                                      \
1265     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1266     {                                                           \
1267         static MVEGenTwoOpShiftFn * const fns[] = {             \
1268             gen_helper_mve_##FN##b,                             \
1269             gen_helper_mve_##FN##h,                             \
1270         };                                                      \
1271         return do_2shift(s, a, fns[a->size], false);            \
1272     }
1273 
1274 DO_VSHLL(VSHLL_BS, vshllbs)
1275 DO_VSHLL(VSHLL_BU, vshllbu)
1276 DO_VSHLL(VSHLL_TS, vshllts)
1277 DO_VSHLL(VSHLL_TU, vshlltu)
1278 
1279 #define DO_2SHIFT_N(INSN, FN)                                   \
1280     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1281     {                                                           \
1282         static MVEGenTwoOpShiftFn * const fns[] = {             \
1283             gen_helper_mve_##FN##b,                             \
1284             gen_helper_mve_##FN##h,                             \
1285         };                                                      \
1286         return do_2shift(s, a, fns[a->size], false);            \
1287     }
1288 
1289 DO_2SHIFT_N(VSHRNB, vshrnb)
1290 DO_2SHIFT_N(VSHRNT, vshrnt)
1291 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1292 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1293 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1294 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1295 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1296 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1297 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1298 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1299 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1300 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1301 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1302 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1303 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1304 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1305 
1306 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1307 {
1308     /*
1309      * Whole Vector Left Shift with Carry. The carry is taken
1310      * from a general purpose register and written back there.
1311      * An imm of 0 means "shift by 32".
1312      */
1313     TCGv_ptr qd;
1314     TCGv_i32 rdm;
1315 
1316     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1317         return false;
1318     }
1319     if (a->rdm == 13 || a->rdm == 15) {
1320         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1321         return false;
1322     }
1323     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1324         return true;
1325     }
1326 
1327     qd = mve_qreg_ptr(a->qd);
1328     rdm = load_reg(s, a->rdm);
1329     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1330     store_reg(s, a->rdm, rdm);
1331     tcg_temp_free_ptr(qd);
1332     mve_update_eci(s);
1333     return true;
1334 }
1335 
1336 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1337 {
1338     TCGv_ptr qd;
1339     TCGv_i32 rn;
1340 
1341     /*
1342      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1343      * This fills the vector with elements of successively increasing
1344      * or decreasing values, starting from Rn.
1345      */
1346     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1347         return false;
1348     }
1349     if (a->size == MO_64) {
1350         /* size 0b11 is another encoding */
1351         return false;
1352     }
1353     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1354         return true;
1355     }
1356 
1357     qd = mve_qreg_ptr(a->qd);
1358     rn = load_reg(s, a->rn);
1359     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1360     store_reg(s, a->rn, rn);
1361     tcg_temp_free_ptr(qd);
1362     mve_update_eci(s);
1363     return true;
1364 }
1365 
1366 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1367 {
1368     TCGv_ptr qd;
1369     TCGv_i32 rn, rm;
1370 
1371     /*
1372      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1373      * This fills the vector with elements of successively increasing
1374      * or decreasing values, starting from Rn. Rm specifies a point where
1375      * the count wraps back around to 0. The updated offset is written back
1376      * to Rn.
1377      */
1378     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1379         return false;
1380     }
1381     if (!fn || a->rm == 13 || a->rm == 15) {
1382         /*
1383          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1384          * Rm == 13 is VIWDUP, VDWDUP.
1385          */
1386         return false;
1387     }
1388     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1389         return true;
1390     }
1391 
1392     qd = mve_qreg_ptr(a->qd);
1393     rn = load_reg(s, a->rn);
1394     rm = load_reg(s, a->rm);
1395     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1396     store_reg(s, a->rn, rn);
1397     tcg_temp_free_ptr(qd);
1398     tcg_temp_free_i32(rm);
1399     mve_update_eci(s);
1400     return true;
1401 }
1402 
1403 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1404 {
1405     static MVEGenVIDUPFn * const fns[] = {
1406         gen_helper_mve_vidupb,
1407         gen_helper_mve_viduph,
1408         gen_helper_mve_vidupw,
1409         NULL,
1410     };
1411     return do_vidup(s, a, fns[a->size]);
1412 }
1413 
1414 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1415 {
1416     static MVEGenVIDUPFn * const fns[] = {
1417         gen_helper_mve_vidupb,
1418         gen_helper_mve_viduph,
1419         gen_helper_mve_vidupw,
1420         NULL,
1421     };
1422     /* VDDUP is just like VIDUP but with a negative immediate */
1423     a->imm = -a->imm;
1424     return do_vidup(s, a, fns[a->size]);
1425 }
1426 
1427 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1428 {
1429     static MVEGenVIWDUPFn * const fns[] = {
1430         gen_helper_mve_viwdupb,
1431         gen_helper_mve_viwduph,
1432         gen_helper_mve_viwdupw,
1433         NULL,
1434     };
1435     return do_viwdup(s, a, fns[a->size]);
1436 }
1437 
1438 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1439 {
1440     static MVEGenVIWDUPFn * const fns[] = {
1441         gen_helper_mve_vdwdupb,
1442         gen_helper_mve_vdwduph,
1443         gen_helper_mve_vdwdupw,
1444         NULL,
1445     };
1446     return do_viwdup(s, a, fns[a->size]);
1447 }
1448 
1449 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1450 {
1451     TCGv_ptr qn, qm;
1452 
1453     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1454         !fn) {
1455         return false;
1456     }
1457     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1458         return true;
1459     }
1460 
1461     qn = mve_qreg_ptr(a->qn);
1462     qm = mve_qreg_ptr(a->qm);
1463     fn(cpu_env, qn, qm);
1464     tcg_temp_free_ptr(qn);
1465     tcg_temp_free_ptr(qm);
1466     if (a->mask) {
1467         /* VPT */
1468         gen_vpst(s, a->mask);
1469     }
1470     mve_update_eci(s);
1471     return true;
1472 }
1473 
1474 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1475                            MVEGenScalarCmpFn *fn)
1476 {
1477     TCGv_ptr qn;
1478     TCGv_i32 rm;
1479 
1480     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1481         return false;
1482     }
1483     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1484         return true;
1485     }
1486 
1487     qn = mve_qreg_ptr(a->qn);
1488     if (a->rm == 15) {
1489         /* Encoding Rm=0b1111 means "constant zero" */
1490         rm = tcg_constant_i32(0);
1491     } else {
1492         rm = load_reg(s, a->rm);
1493     }
1494     fn(cpu_env, qn, rm);
1495     tcg_temp_free_ptr(qn);
1496     tcg_temp_free_i32(rm);
1497     if (a->mask) {
1498         /* VPT */
1499         gen_vpst(s, a->mask);
1500     }
1501     mve_update_eci(s);
1502     return true;
1503 }
1504 
1505 #define DO_VCMP(INSN, FN)                                       \
1506     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
1507     {                                                           \
1508         static MVEGenCmpFn * const fns[] = {                    \
1509             gen_helper_mve_##FN##b,                             \
1510             gen_helper_mve_##FN##h,                             \
1511             gen_helper_mve_##FN##w,                             \
1512             NULL,                                               \
1513         };                                                      \
1514         return do_vcmp(s, a, fns[a->size]);                     \
1515     }                                                           \
1516     static bool trans_##INSN##_scalar(DisasContext *s,          \
1517                                       arg_vcmp_scalar *a)       \
1518     {                                                           \
1519         static MVEGenScalarCmpFn * const fns[] = {              \
1520             gen_helper_mve_##FN##_scalarb,                      \
1521             gen_helper_mve_##FN##_scalarh,                      \
1522             gen_helper_mve_##FN##_scalarw,                      \
1523             NULL,                                               \
1524         };                                                      \
1525         return do_vcmp_scalar(s, a, fns[a->size]);              \
1526     }
1527 
1528 DO_VCMP(VCMPEQ, vcmpeq)
1529 DO_VCMP(VCMPNE, vcmpne)
1530 DO_VCMP(VCMPCS, vcmpcs)
1531 DO_VCMP(VCMPHI, vcmphi)
1532 DO_VCMP(VCMPGE, vcmpge)
1533 DO_VCMP(VCMPLT, vcmplt)
1534 DO_VCMP(VCMPGT, vcmpgt)
1535 DO_VCMP(VCMPLE, vcmple)
1536 
1537 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
1538 {
1539     /*
1540      * MIN/MAX operations across a vector: compute the min or
1541      * max of the initial value in a general purpose register
1542      * and all the elements in the vector, and store it back
1543      * into the general purpose register.
1544      */
1545     TCGv_ptr qm;
1546     TCGv_i32 rda;
1547 
1548     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1549         !fn || a->rda == 13 || a->rda == 15) {
1550         /* Rda cases are UNPREDICTABLE */
1551         return false;
1552     }
1553     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1554         return true;
1555     }
1556 
1557     qm = mve_qreg_ptr(a->qm);
1558     rda = load_reg(s, a->rda);
1559     fn(rda, cpu_env, qm, rda);
1560     store_reg(s, a->rda, rda);
1561     tcg_temp_free_ptr(qm);
1562     mve_update_eci(s);
1563     return true;
1564 }
1565 
1566 #define DO_VMAXV(INSN, FN)                                      \
1567     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
1568     {                                                           \
1569         static MVEGenVADDVFn * const fns[] = {                  \
1570             gen_helper_mve_##FN##b,                             \
1571             gen_helper_mve_##FN##h,                             \
1572             gen_helper_mve_##FN##w,                             \
1573             NULL,                                               \
1574         };                                                      \
1575         return do_vmaxv(s, a, fns[a->size]);                    \
1576     }
1577 
1578 DO_VMAXV(VMAXV_S, vmaxvs)
1579 DO_VMAXV(VMAXV_U, vmaxvu)
1580 DO_VMAXV(VMAXAV, vmaxav)
1581 DO_VMAXV(VMINV_S, vminvs)
1582 DO_VMAXV(VMINV_U, vminvu)
1583 DO_VMAXV(VMINAV, vminav)
1584 
1585 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
1586 {
1587     /* Absolute difference accumulated across vector */
1588     TCGv_ptr qn, qm;
1589     TCGv_i32 rda;
1590 
1591     if (!dc_isar_feature(aa32_mve, s) ||
1592         !mve_check_qreg_bank(s, a->qm | a->qn) ||
1593         !fn || a->rda == 13 || a->rda == 15) {
1594         /* Rda cases are UNPREDICTABLE */
1595         return false;
1596     }
1597     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1598         return true;
1599     }
1600 
1601     qm = mve_qreg_ptr(a->qm);
1602     qn = mve_qreg_ptr(a->qn);
1603     rda = load_reg(s, a->rda);
1604     fn(rda, cpu_env, qn, qm, rda);
1605     store_reg(s, a->rda, rda);
1606     tcg_temp_free_ptr(qm);
1607     tcg_temp_free_ptr(qn);
1608     mve_update_eci(s);
1609     return true;
1610 }
1611 
1612 #define DO_VABAV(INSN, FN)                                      \
1613     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
1614     {                                                           \
1615         static MVEGenVABAVFn * const fns[] = {                  \
1616             gen_helper_mve_##FN##b,                             \
1617             gen_helper_mve_##FN##h,                             \
1618             gen_helper_mve_##FN##w,                             \
1619             NULL,                                               \
1620         };                                                      \
1621         return do_vabav(s, a, fns[a->size]);                    \
1622     }
1623 
1624 DO_VABAV(VABAV_S, vabavs)
1625 DO_VABAV(VABAV_U, vabavu)
1626 
1627 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
1628 {
1629     /*
1630      * VMOV two 32-bit vector lanes to two general-purpose registers.
1631      * This insn is not predicated but it is subject to beat-wise
1632      * execution if it is not in an IT block. For us this means
1633      * only that if PSR.ECI says we should not be executing the beat
1634      * corresponding to the lane of the vector register being accessed
1635      * then we should skip perfoming the move, and that we need to do
1636      * the usual check for bad ECI state and advance of ECI state.
1637      * (If PSR.ECI is non-zero then we cannot be in an IT block.)
1638      */
1639     TCGv_i32 tmp;
1640     int vd;
1641 
1642     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
1643         a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
1644         a->rt == a->rt2) {
1645         /* Rt/Rt2 cases are UNPREDICTABLE */
1646         return false;
1647     }
1648     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1649         return true;
1650     }
1651 
1652     /* Convert Qreg index to Dreg for read_neon_element32() etc */
1653     vd = a->qd * 2;
1654 
1655     if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
1656         tmp = tcg_temp_new_i32();
1657         read_neon_element32(tmp, vd, a->idx, MO_32);
1658         store_reg(s, a->rt, tmp);
1659     }
1660     if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
1661         tmp = tcg_temp_new_i32();
1662         read_neon_element32(tmp, vd + 1, a->idx, MO_32);
1663         store_reg(s, a->rt2, tmp);
1664     }
1665 
1666     mve_update_and_store_eci(s);
1667     return true;
1668 }
1669 
1670 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
1671 {
1672     /*
1673      * VMOV two general-purpose registers to two 32-bit vector lanes.
1674      * This insn is not predicated but it is subject to beat-wise
1675      * execution if it is not in an IT block. For us this means
1676      * only that if PSR.ECI says we should not be executing the beat
1677      * corresponding to the lane of the vector register being accessed
1678      * then we should skip perfoming the move, and that we need to do
1679      * the usual check for bad ECI state and advance of ECI state.
1680      * (If PSR.ECI is non-zero then we cannot be in an IT block.)
1681      */
1682     TCGv_i32 tmp;
1683     int vd;
1684 
1685     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
1686         a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
1687         /* Rt/Rt2 cases are UNPREDICTABLE */
1688         return false;
1689     }
1690     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1691         return true;
1692     }
1693 
1694     /* Convert Qreg idx to Dreg for read_neon_element32() etc */
1695     vd = a->qd * 2;
1696 
1697     if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
1698         tmp = load_reg(s, a->rt);
1699         write_neon_element32(tmp, vd, a->idx, MO_32);
1700         tcg_temp_free_i32(tmp);
1701     }
1702     if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
1703         tmp = load_reg(s, a->rt2);
1704         write_neon_element32(tmp, vd + 1, a->idx, MO_32);
1705         tcg_temp_free_i32(tmp);
1706     }
1707 
1708     mve_update_and_store_eci(s);
1709     return true;
1710 }
1711