xref: /qemu/target/arm/tcg/translate-mve.c (revision 398e7cd3cd7a82eb04d236c7e30171f058f234b7)
1 /*
2  *  ARM translation: M-profile MVE instructions
3  *
4  *  Copyright (c) 2021 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
27 
28 static inline int vidup_imm(DisasContext *s, int x)
29 {
30     return 1 << x;
31 }
32 
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
35 
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
49 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
50 
51 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
52 static inline long mve_qreg_offset(unsigned reg)
53 {
54     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
55 }
56 
57 static TCGv_ptr mve_qreg_ptr(unsigned reg)
58 {
59     TCGv_ptr ret = tcg_temp_new_ptr();
60     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
61     return ret;
62 }
63 
64 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
65 {
66     /*
67      * Check whether Qregs are in range. For v8.1M only Q0..Q7
68      * are supported, see VFPSmallRegisterBank().
69      */
70     return qmask < 8;
71 }
72 
73 bool mve_eci_check(DisasContext *s)
74 {
75     /*
76      * This is a beatwise insn: check that ECI is valid (not a
77      * reserved value) and note that we are handling it.
78      * Return true if OK, false if we generated an exception.
79      */
80     s->eci_handled = true;
81     switch (s->eci) {
82     case ECI_NONE:
83     case ECI_A0:
84     case ECI_A0A1:
85     case ECI_A0A1A2:
86     case ECI_A0A1A2B0:
87         return true;
88     default:
89         /* Reserved value: INVSTATE UsageFault */
90         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
91                            default_exception_el(s));
92         return false;
93     }
94 }
95 
96 static void mve_update_eci(DisasContext *s)
97 {
98     /*
99      * The helper function will always update the CPUState field,
100      * so we only need to update the DisasContext field.
101      */
102     if (s->eci) {
103         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
104     }
105 }
106 
107 void mve_update_and_store_eci(DisasContext *s)
108 {
109     /*
110      * For insns which don't call a helper function that will call
111      * mve_advance_vpt(), this version updates s->eci and also stores
112      * it out to the CPUState field.
113      */
114     if (s->eci) {
115         mve_update_eci(s);
116         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
117     }
118 }
119 
120 static bool mve_skip_first_beat(DisasContext *s)
121 {
122     /* Return true if PSR.ECI says we must skip the first beat of this insn */
123     switch (s->eci) {
124     case ECI_NONE:
125         return false;
126     case ECI_A0:
127     case ECI_A0A1:
128     case ECI_A0A1A2:
129     case ECI_A0A1A2B0:
130         return true;
131     default:
132         g_assert_not_reached();
133     }
134 }
135 
136 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
137                     unsigned msize)
138 {
139     TCGv_i32 addr;
140     uint32_t offset;
141     TCGv_ptr qreg;
142 
143     if (!dc_isar_feature(aa32_mve, s) ||
144         !mve_check_qreg_bank(s, a->qd) ||
145         !fn) {
146         return false;
147     }
148 
149     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
150     if (a->rn == 15 || (a->rn == 13 && a->w)) {
151         return false;
152     }
153 
154     if (!mve_eci_check(s) || !vfp_access_check(s)) {
155         return true;
156     }
157 
158     offset = a->imm << msize;
159     if (!a->a) {
160         offset = -offset;
161     }
162     addr = load_reg(s, a->rn);
163     if (a->p) {
164         tcg_gen_addi_i32(addr, addr, offset);
165     }
166 
167     qreg = mve_qreg_ptr(a->qd);
168     fn(cpu_env, qreg, addr);
169     tcg_temp_free_ptr(qreg);
170 
171     /*
172      * Writeback always happens after the last beat of the insn,
173      * regardless of predication
174      */
175     if (a->w) {
176         if (!a->p) {
177             tcg_gen_addi_i32(addr, addr, offset);
178         }
179         store_reg(s, a->rn, addr);
180     } else {
181         tcg_temp_free_i32(addr);
182     }
183     mve_update_eci(s);
184     return true;
185 }
186 
187 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
188 {
189     static MVEGenLdStFn * const ldstfns[4][2] = {
190         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
191         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
192         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
193         { NULL, NULL }
194     };
195     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
196 }
197 
198 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
199     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
200     {                                                           \
201         static MVEGenLdStFn * const ldstfns[2][2] = {           \
202             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
203             { NULL, gen_helper_mve_##ULD },                     \
204         };                                                      \
205         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
206     }
207 
208 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
209 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
210 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
211 
212 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
213 {
214     TCGv_ptr qd;
215     TCGv_i32 rt;
216 
217     if (!dc_isar_feature(aa32_mve, s) ||
218         !mve_check_qreg_bank(s, a->qd)) {
219         return false;
220     }
221     if (a->rt == 13 || a->rt == 15) {
222         /* UNPREDICTABLE; we choose to UNDEF */
223         return false;
224     }
225     if (!mve_eci_check(s) || !vfp_access_check(s)) {
226         return true;
227     }
228 
229     qd = mve_qreg_ptr(a->qd);
230     rt = load_reg(s, a->rt);
231     tcg_gen_dup_i32(a->size, rt, rt);
232     gen_helper_mve_vdup(cpu_env, qd, rt);
233     tcg_temp_free_ptr(qd);
234     tcg_temp_free_i32(rt);
235     mve_update_eci(s);
236     return true;
237 }
238 
239 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
240 {
241     TCGv_ptr qd, qm;
242 
243     if (!dc_isar_feature(aa32_mve, s) ||
244         !mve_check_qreg_bank(s, a->qd | a->qm) ||
245         !fn) {
246         return false;
247     }
248 
249     if (!mve_eci_check(s) || !vfp_access_check(s)) {
250         return true;
251     }
252 
253     qd = mve_qreg_ptr(a->qd);
254     qm = mve_qreg_ptr(a->qm);
255     fn(cpu_env, qd, qm);
256     tcg_temp_free_ptr(qd);
257     tcg_temp_free_ptr(qm);
258     mve_update_eci(s);
259     return true;
260 }
261 
262 #define DO_1OP(INSN, FN)                                        \
263     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
264     {                                                           \
265         static MVEGenOneOpFn * const fns[] = {                  \
266             gen_helper_mve_##FN##b,                             \
267             gen_helper_mve_##FN##h,                             \
268             gen_helper_mve_##FN##w,                             \
269             NULL,                                               \
270         };                                                      \
271         return do_1op(s, a, fns[a->size]);                      \
272     }
273 
274 DO_1OP(VCLZ, vclz)
275 DO_1OP(VCLS, vcls)
276 DO_1OP(VABS, vabs)
277 DO_1OP(VNEG, vneg)
278 DO_1OP(VQABS, vqabs)
279 DO_1OP(VQNEG, vqneg)
280 
281 /* Narrowing moves: only size 0 and 1 are valid */
282 #define DO_VMOVN(INSN, FN) \
283     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
284     {                                                           \
285         static MVEGenOneOpFn * const fns[] = {                  \
286             gen_helper_mve_##FN##b,                             \
287             gen_helper_mve_##FN##h,                             \
288             NULL,                                               \
289             NULL,                                               \
290         };                                                      \
291         return do_1op(s, a, fns[a->size]);                      \
292     }
293 
294 DO_VMOVN(VMOVNB, vmovnb)
295 DO_VMOVN(VMOVNT, vmovnt)
296 DO_VMOVN(VQMOVUNB, vqmovunb)
297 DO_VMOVN(VQMOVUNT, vqmovunt)
298 DO_VMOVN(VQMOVN_BS, vqmovnbs)
299 DO_VMOVN(VQMOVN_TS, vqmovnts)
300 DO_VMOVN(VQMOVN_BU, vqmovnbu)
301 DO_VMOVN(VQMOVN_TU, vqmovntu)
302 
303 static bool trans_VREV16(DisasContext *s, arg_1op *a)
304 {
305     static MVEGenOneOpFn * const fns[] = {
306         gen_helper_mve_vrev16b,
307         NULL,
308         NULL,
309         NULL,
310     };
311     return do_1op(s, a, fns[a->size]);
312 }
313 
314 static bool trans_VREV32(DisasContext *s, arg_1op *a)
315 {
316     static MVEGenOneOpFn * const fns[] = {
317         gen_helper_mve_vrev32b,
318         gen_helper_mve_vrev32h,
319         NULL,
320         NULL,
321     };
322     return do_1op(s, a, fns[a->size]);
323 }
324 
325 static bool trans_VREV64(DisasContext *s, arg_1op *a)
326 {
327     static MVEGenOneOpFn * const fns[] = {
328         gen_helper_mve_vrev64b,
329         gen_helper_mve_vrev64h,
330         gen_helper_mve_vrev64w,
331         NULL,
332     };
333     return do_1op(s, a, fns[a->size]);
334 }
335 
336 static bool trans_VMVN(DisasContext *s, arg_1op *a)
337 {
338     return do_1op(s, a, gen_helper_mve_vmvn);
339 }
340 
341 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
342 {
343     static MVEGenOneOpFn * const fns[] = {
344         NULL,
345         gen_helper_mve_vfabsh,
346         gen_helper_mve_vfabss,
347         NULL,
348     };
349     if (!dc_isar_feature(aa32_mve_fp, s)) {
350         return false;
351     }
352     return do_1op(s, a, fns[a->size]);
353 }
354 
355 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
356 {
357     static MVEGenOneOpFn * const fns[] = {
358         NULL,
359         gen_helper_mve_vfnegh,
360         gen_helper_mve_vfnegs,
361         NULL,
362     };
363     if (!dc_isar_feature(aa32_mve_fp, s)) {
364         return false;
365     }
366     return do_1op(s, a, fns[a->size]);
367 }
368 
369 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
370 {
371     TCGv_ptr qd, qn, qm;
372 
373     if (!dc_isar_feature(aa32_mve, s) ||
374         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
375         !fn) {
376         return false;
377     }
378     if (!mve_eci_check(s) || !vfp_access_check(s)) {
379         return true;
380     }
381 
382     qd = mve_qreg_ptr(a->qd);
383     qn = mve_qreg_ptr(a->qn);
384     qm = mve_qreg_ptr(a->qm);
385     fn(cpu_env, qd, qn, qm);
386     tcg_temp_free_ptr(qd);
387     tcg_temp_free_ptr(qn);
388     tcg_temp_free_ptr(qm);
389     mve_update_eci(s);
390     return true;
391 }
392 
393 #define DO_LOGIC(INSN, HELPER)                                  \
394     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
395     {                                                           \
396         return do_2op(s, a, HELPER);                            \
397     }
398 
399 DO_LOGIC(VAND, gen_helper_mve_vand)
400 DO_LOGIC(VBIC, gen_helper_mve_vbic)
401 DO_LOGIC(VORR, gen_helper_mve_vorr)
402 DO_LOGIC(VORN, gen_helper_mve_vorn)
403 DO_LOGIC(VEOR, gen_helper_mve_veor)
404 
405 DO_LOGIC(VPSEL, gen_helper_mve_vpsel)
406 
407 #define DO_2OP(INSN, FN) \
408     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
409     {                                                           \
410         static MVEGenTwoOpFn * const fns[] = {                  \
411             gen_helper_mve_##FN##b,                             \
412             gen_helper_mve_##FN##h,                             \
413             gen_helper_mve_##FN##w,                             \
414             NULL,                                               \
415         };                                                      \
416         return do_2op(s, a, fns[a->size]);                      \
417     }
418 
419 DO_2OP(VADD, vadd)
420 DO_2OP(VSUB, vsub)
421 DO_2OP(VMUL, vmul)
422 DO_2OP(VMULH_S, vmulhs)
423 DO_2OP(VMULH_U, vmulhu)
424 DO_2OP(VRMULH_S, vrmulhs)
425 DO_2OP(VRMULH_U, vrmulhu)
426 DO_2OP(VMAX_S, vmaxs)
427 DO_2OP(VMAX_U, vmaxu)
428 DO_2OP(VMIN_S, vmins)
429 DO_2OP(VMIN_U, vminu)
430 DO_2OP(VABD_S, vabds)
431 DO_2OP(VABD_U, vabdu)
432 DO_2OP(VHADD_S, vhadds)
433 DO_2OP(VHADD_U, vhaddu)
434 DO_2OP(VHSUB_S, vhsubs)
435 DO_2OP(VHSUB_U, vhsubu)
436 DO_2OP(VMULL_BS, vmullbs)
437 DO_2OP(VMULL_BU, vmullbu)
438 DO_2OP(VMULL_TS, vmullts)
439 DO_2OP(VMULL_TU, vmulltu)
440 DO_2OP(VQDMULH, vqdmulh)
441 DO_2OP(VQRDMULH, vqrdmulh)
442 DO_2OP(VQADD_S, vqadds)
443 DO_2OP(VQADD_U, vqaddu)
444 DO_2OP(VQSUB_S, vqsubs)
445 DO_2OP(VQSUB_U, vqsubu)
446 DO_2OP(VSHL_S, vshls)
447 DO_2OP(VSHL_U, vshlu)
448 DO_2OP(VRSHL_S, vrshls)
449 DO_2OP(VRSHL_U, vrshlu)
450 DO_2OP(VQSHL_S, vqshls)
451 DO_2OP(VQSHL_U, vqshlu)
452 DO_2OP(VQRSHL_S, vqrshls)
453 DO_2OP(VQRSHL_U, vqrshlu)
454 DO_2OP(VQDMLADH, vqdmladh)
455 DO_2OP(VQDMLADHX, vqdmladhx)
456 DO_2OP(VQRDMLADH, vqrdmladh)
457 DO_2OP(VQRDMLADHX, vqrdmladhx)
458 DO_2OP(VQDMLSDH, vqdmlsdh)
459 DO_2OP(VQDMLSDHX, vqdmlsdhx)
460 DO_2OP(VQRDMLSDH, vqrdmlsdh)
461 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
462 DO_2OP(VRHADD_S, vrhadds)
463 DO_2OP(VRHADD_U, vrhaddu)
464 /*
465  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
466  * so we can reuse the DO_2OP macro. (Our implementation calculates the
467  * "expected" results in this case.) Similarly for VHCADD.
468  */
469 DO_2OP(VCADD90, vcadd90)
470 DO_2OP(VCADD270, vcadd270)
471 DO_2OP(VHCADD90, vhcadd90)
472 DO_2OP(VHCADD270, vhcadd270)
473 
474 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
475 {
476     static MVEGenTwoOpFn * const fns[] = {
477         NULL,
478         gen_helper_mve_vqdmullbh,
479         gen_helper_mve_vqdmullbw,
480         NULL,
481     };
482     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
483         /* UNPREDICTABLE; we choose to undef */
484         return false;
485     }
486     return do_2op(s, a, fns[a->size]);
487 }
488 
489 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
490 {
491     static MVEGenTwoOpFn * const fns[] = {
492         NULL,
493         gen_helper_mve_vqdmullth,
494         gen_helper_mve_vqdmulltw,
495         NULL,
496     };
497     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
498         /* UNPREDICTABLE; we choose to undef */
499         return false;
500     }
501     return do_2op(s, a, fns[a->size]);
502 }
503 
504 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
505 {
506     /*
507      * Note that a->size indicates the output size, ie VMULL.P8
508      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
509      * is the 16x16->32 operation and a->size is MO_32.
510      */
511     static MVEGenTwoOpFn * const fns[] = {
512         NULL,
513         gen_helper_mve_vmullpbh,
514         gen_helper_mve_vmullpbw,
515         NULL,
516     };
517     return do_2op(s, a, fns[a->size]);
518 }
519 
520 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
521 {
522     /* a->size is as for trans_VMULLP_B */
523     static MVEGenTwoOpFn * const fns[] = {
524         NULL,
525         gen_helper_mve_vmullpth,
526         gen_helper_mve_vmullptw,
527         NULL,
528     };
529     return do_2op(s, a, fns[a->size]);
530 }
531 
532 /*
533  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
534  * of the 32-bit elements in each lane of the input vectors, where the
535  * carry-out of each add is the carry-in of the next.  The initial carry
536  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
537  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
538  * These insns are subject to beat-wise execution.  Partial execution
539  * of an I=1 (initial carry input fixed) insn which does not
540  * execute the first beat must start with the current FPSCR.NZCV
541  * value, not the fixed constant input.
542  */
543 static bool trans_VADC(DisasContext *s, arg_2op *a)
544 {
545     return do_2op(s, a, gen_helper_mve_vadc);
546 }
547 
548 static bool trans_VADCI(DisasContext *s, arg_2op *a)
549 {
550     if (mve_skip_first_beat(s)) {
551         return trans_VADC(s, a);
552     }
553     return do_2op(s, a, gen_helper_mve_vadci);
554 }
555 
556 static bool trans_VSBC(DisasContext *s, arg_2op *a)
557 {
558     return do_2op(s, a, gen_helper_mve_vsbc);
559 }
560 
561 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
562 {
563     if (mve_skip_first_beat(s)) {
564         return trans_VSBC(s, a);
565     }
566     return do_2op(s, a, gen_helper_mve_vsbci);
567 }
568 
569 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
570                           MVEGenTwoOpScalarFn fn)
571 {
572     TCGv_ptr qd, qn;
573     TCGv_i32 rm;
574 
575     if (!dc_isar_feature(aa32_mve, s) ||
576         !mve_check_qreg_bank(s, a->qd | a->qn) ||
577         !fn) {
578         return false;
579     }
580     if (a->rm == 13 || a->rm == 15) {
581         /* UNPREDICTABLE */
582         return false;
583     }
584     if (!mve_eci_check(s) || !vfp_access_check(s)) {
585         return true;
586     }
587 
588     qd = mve_qreg_ptr(a->qd);
589     qn = mve_qreg_ptr(a->qn);
590     rm = load_reg(s, a->rm);
591     fn(cpu_env, qd, qn, rm);
592     tcg_temp_free_i32(rm);
593     tcg_temp_free_ptr(qd);
594     tcg_temp_free_ptr(qn);
595     mve_update_eci(s);
596     return true;
597 }
598 
599 #define DO_2OP_SCALAR(INSN, FN) \
600     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
601     {                                                           \
602         static MVEGenTwoOpScalarFn * const fns[] = {            \
603             gen_helper_mve_##FN##b,                             \
604             gen_helper_mve_##FN##h,                             \
605             gen_helper_mve_##FN##w,                             \
606             NULL,                                               \
607         };                                                      \
608         return do_2op_scalar(s, a, fns[a->size]);               \
609     }
610 
611 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
612 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
613 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
614 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
615 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
616 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
617 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
618 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
619 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
620 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
621 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
622 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
623 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
624 DO_2OP_SCALAR(VBRSR, vbrsr)
625 DO_2OP_SCALAR(VMLA, vmla)
626 DO_2OP_SCALAR(VMLAS, vmlas)
627 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
628 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
629 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
630 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
631 
632 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
633 {
634     static MVEGenTwoOpScalarFn * const fns[] = {
635         NULL,
636         gen_helper_mve_vqdmullb_scalarh,
637         gen_helper_mve_vqdmullb_scalarw,
638         NULL,
639     };
640     if (a->qd == a->qn && a->size == MO_32) {
641         /* UNPREDICTABLE; we choose to undef */
642         return false;
643     }
644     return do_2op_scalar(s, a, fns[a->size]);
645 }
646 
647 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
648 {
649     static MVEGenTwoOpScalarFn * const fns[] = {
650         NULL,
651         gen_helper_mve_vqdmullt_scalarh,
652         gen_helper_mve_vqdmullt_scalarw,
653         NULL,
654     };
655     if (a->qd == a->qn && a->size == MO_32) {
656         /* UNPREDICTABLE; we choose to undef */
657         return false;
658     }
659     return do_2op_scalar(s, a, fns[a->size]);
660 }
661 
662 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
663                              MVEGenLongDualAccOpFn *fn)
664 {
665     TCGv_ptr qn, qm;
666     TCGv_i64 rda;
667     TCGv_i32 rdalo, rdahi;
668 
669     if (!dc_isar_feature(aa32_mve, s) ||
670         !mve_check_qreg_bank(s, a->qn | a->qm) ||
671         !fn) {
672         return false;
673     }
674     /*
675      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
676      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
677      */
678     if (a->rdahi == 13 || a->rdahi == 15) {
679         return false;
680     }
681     if (!mve_eci_check(s) || !vfp_access_check(s)) {
682         return true;
683     }
684 
685     qn = mve_qreg_ptr(a->qn);
686     qm = mve_qreg_ptr(a->qm);
687 
688     /*
689      * This insn is subject to beat-wise execution. Partial execution
690      * of an A=0 (no-accumulate) insn which does not execute the first
691      * beat must start with the current rda value, not 0.
692      */
693     if (a->a || mve_skip_first_beat(s)) {
694         rda = tcg_temp_new_i64();
695         rdalo = load_reg(s, a->rdalo);
696         rdahi = load_reg(s, a->rdahi);
697         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
698         tcg_temp_free_i32(rdalo);
699         tcg_temp_free_i32(rdahi);
700     } else {
701         rda = tcg_const_i64(0);
702     }
703 
704     fn(rda, cpu_env, qn, qm, rda);
705     tcg_temp_free_ptr(qn);
706     tcg_temp_free_ptr(qm);
707 
708     rdalo = tcg_temp_new_i32();
709     rdahi = tcg_temp_new_i32();
710     tcg_gen_extrl_i64_i32(rdalo, rda);
711     tcg_gen_extrh_i64_i32(rdahi, rda);
712     store_reg(s, a->rdalo, rdalo);
713     store_reg(s, a->rdahi, rdahi);
714     tcg_temp_free_i64(rda);
715     mve_update_eci(s);
716     return true;
717 }
718 
719 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
720 {
721     static MVEGenLongDualAccOpFn * const fns[4][2] = {
722         { NULL, NULL },
723         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
724         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
725         { NULL, NULL },
726     };
727     return do_long_dual_acc(s, a, fns[a->size][a->x]);
728 }
729 
730 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
731 {
732     static MVEGenLongDualAccOpFn * const fns[4][2] = {
733         { NULL, NULL },
734         { gen_helper_mve_vmlaldavuh, NULL },
735         { gen_helper_mve_vmlaldavuw, NULL },
736         { NULL, NULL },
737     };
738     return do_long_dual_acc(s, a, fns[a->size][a->x]);
739 }
740 
741 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
742 {
743     static MVEGenLongDualAccOpFn * const fns[4][2] = {
744         { NULL, NULL },
745         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
746         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
747         { NULL, NULL },
748     };
749     return do_long_dual_acc(s, a, fns[a->size][a->x]);
750 }
751 
752 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
753 {
754     static MVEGenLongDualAccOpFn * const fns[] = {
755         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
756     };
757     return do_long_dual_acc(s, a, fns[a->x]);
758 }
759 
760 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
761 {
762     static MVEGenLongDualAccOpFn * const fns[] = {
763         gen_helper_mve_vrmlaldavhuw, NULL,
764     };
765     return do_long_dual_acc(s, a, fns[a->x]);
766 }
767 
768 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
769 {
770     static MVEGenLongDualAccOpFn * const fns[] = {
771         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
772     };
773     return do_long_dual_acc(s, a, fns[a->x]);
774 }
775 
776 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
777 {
778     TCGv_ptr qn, qm;
779     TCGv_i32 rda;
780 
781     if (!dc_isar_feature(aa32_mve, s) ||
782         !mve_check_qreg_bank(s, a->qn) ||
783         !fn) {
784         return false;
785     }
786     if (!mve_eci_check(s) || !vfp_access_check(s)) {
787         return true;
788     }
789 
790     qn = mve_qreg_ptr(a->qn);
791     qm = mve_qreg_ptr(a->qm);
792 
793     /*
794      * This insn is subject to beat-wise execution. Partial execution
795      * of an A=0 (no-accumulate) insn which does not execute the first
796      * beat must start with the current rda value, not 0.
797      */
798     if (a->a || mve_skip_first_beat(s)) {
799         rda = load_reg(s, a->rda);
800     } else {
801         rda = tcg_const_i32(0);
802     }
803 
804     fn(rda, cpu_env, qn, qm, rda);
805     store_reg(s, a->rda, rda);
806     tcg_temp_free_ptr(qn);
807     tcg_temp_free_ptr(qm);
808 
809     mve_update_eci(s);
810     return true;
811 }
812 
813 #define DO_DUAL_ACC(INSN, FN)                                           \
814     static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
815     {                                                                   \
816         static MVEGenDualAccOpFn * const fns[4][2] = {                  \
817             { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
818             { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
819             { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
820             { NULL, NULL },                                             \
821         };                                                              \
822         return do_dual_acc(s, a, fns[a->size][a->x]);                   \
823     }
824 
825 DO_DUAL_ACC(VMLADAV_S, vmladavs)
826 DO_DUAL_ACC(VMLSDAV, vmlsdav)
827 
828 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
829 {
830     static MVEGenDualAccOpFn * const fns[4][2] = {
831         { gen_helper_mve_vmladavub, NULL },
832         { gen_helper_mve_vmladavuh, NULL },
833         { gen_helper_mve_vmladavuw, NULL },
834         { NULL, NULL },
835     };
836     return do_dual_acc(s, a, fns[a->size][a->x]);
837 }
838 
839 static void gen_vpst(DisasContext *s, uint32_t mask)
840 {
841     /*
842      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
843      * being adjacent fields in the register.
844      *
845      * Updating the masks is not predicated, but it is subject to beat-wise
846      * execution, and the mask is updated on the odd-numbered beats.
847      * So if PSR.ECI says we should skip beat 1, we mustn't update the
848      * 01 mask field.
849      */
850     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
851     switch (s->eci) {
852     case ECI_NONE:
853     case ECI_A0:
854         /* Update both 01 and 23 fields */
855         tcg_gen_deposit_i32(vpr, vpr,
856                             tcg_constant_i32(mask | (mask << 4)),
857                             R_V7M_VPR_MASK01_SHIFT,
858                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
859         break;
860     case ECI_A0A1:
861     case ECI_A0A1A2:
862     case ECI_A0A1A2B0:
863         /* Update only the 23 mask field */
864         tcg_gen_deposit_i32(vpr, vpr,
865                             tcg_constant_i32(mask),
866                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
867         break;
868     default:
869         g_assert_not_reached();
870     }
871     store_cpu_field(vpr, v7m.vpr);
872 }
873 
874 static bool trans_VPST(DisasContext *s, arg_VPST *a)
875 {
876     /* mask == 0 is a "related encoding" */
877     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
878         return false;
879     }
880     if (!mve_eci_check(s) || !vfp_access_check(s)) {
881         return true;
882     }
883     gen_vpst(s, a->mask);
884     mve_update_and_store_eci(s);
885     return true;
886 }
887 
888 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
889 {
890     /* VADDV: vector add across vector */
891     static MVEGenVADDVFn * const fns[4][2] = {
892         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
893         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
894         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
895         { NULL, NULL }
896     };
897     TCGv_ptr qm;
898     TCGv_i32 rda;
899 
900     if (!dc_isar_feature(aa32_mve, s) ||
901         a->size == 3) {
902         return false;
903     }
904     if (!mve_eci_check(s) || !vfp_access_check(s)) {
905         return true;
906     }
907 
908     /*
909      * This insn is subject to beat-wise execution. Partial execution
910      * of an A=0 (no-accumulate) insn which does not execute the first
911      * beat must start with the current value of Rda, not zero.
912      */
913     if (a->a || mve_skip_first_beat(s)) {
914         /* Accumulate input from Rda */
915         rda = load_reg(s, a->rda);
916     } else {
917         /* Accumulate starting at zero */
918         rda = tcg_const_i32(0);
919     }
920 
921     qm = mve_qreg_ptr(a->qm);
922     fns[a->size][a->u](rda, cpu_env, qm, rda);
923     store_reg(s, a->rda, rda);
924     tcg_temp_free_ptr(qm);
925 
926     mve_update_eci(s);
927     return true;
928 }
929 
930 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
931 {
932     /*
933      * Vector Add Long Across Vector: accumulate the 32-bit
934      * elements of the vector into a 64-bit result stored in
935      * a pair of general-purpose registers.
936      * No need to check Qm's bank: it is only 3 bits in decode.
937      */
938     TCGv_ptr qm;
939     TCGv_i64 rda;
940     TCGv_i32 rdalo, rdahi;
941 
942     if (!dc_isar_feature(aa32_mve, s)) {
943         return false;
944     }
945     /*
946      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
947      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
948      */
949     if (a->rdahi == 13 || a->rdahi == 15) {
950         return false;
951     }
952     if (!mve_eci_check(s) || !vfp_access_check(s)) {
953         return true;
954     }
955 
956     /*
957      * This insn is subject to beat-wise execution. Partial execution
958      * of an A=0 (no-accumulate) insn which does not execute the first
959      * beat must start with the current value of RdaHi:RdaLo, not zero.
960      */
961     if (a->a || mve_skip_first_beat(s)) {
962         /* Accumulate input from RdaHi:RdaLo */
963         rda = tcg_temp_new_i64();
964         rdalo = load_reg(s, a->rdalo);
965         rdahi = load_reg(s, a->rdahi);
966         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
967         tcg_temp_free_i32(rdalo);
968         tcg_temp_free_i32(rdahi);
969     } else {
970         /* Accumulate starting at zero */
971         rda = tcg_const_i64(0);
972     }
973 
974     qm = mve_qreg_ptr(a->qm);
975     if (a->u) {
976         gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
977     } else {
978         gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
979     }
980     tcg_temp_free_ptr(qm);
981 
982     rdalo = tcg_temp_new_i32();
983     rdahi = tcg_temp_new_i32();
984     tcg_gen_extrl_i64_i32(rdalo, rda);
985     tcg_gen_extrh_i64_i32(rdahi, rda);
986     store_reg(s, a->rdalo, rdalo);
987     store_reg(s, a->rdahi, rdahi);
988     tcg_temp_free_i64(rda);
989     mve_update_eci(s);
990     return true;
991 }
992 
993 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
994 {
995     TCGv_ptr qd;
996     uint64_t imm;
997 
998     if (!dc_isar_feature(aa32_mve, s) ||
999         !mve_check_qreg_bank(s, a->qd) ||
1000         !fn) {
1001         return false;
1002     }
1003     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1004         return true;
1005     }
1006 
1007     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1008 
1009     qd = mve_qreg_ptr(a->qd);
1010     fn(cpu_env, qd, tcg_constant_i64(imm));
1011     tcg_temp_free_ptr(qd);
1012     mve_update_eci(s);
1013     return true;
1014 }
1015 
1016 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1017 {
1018     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1019     MVEGenOneOpImmFn *fn;
1020 
1021     if ((a->cmode & 1) && a->cmode < 12) {
1022         if (a->op) {
1023             /*
1024              * For op=1, the immediate will be inverted by asimd_imm_const(),
1025              * so the VBIC becomes a logical AND operation.
1026              */
1027             fn = gen_helper_mve_vandi;
1028         } else {
1029             fn = gen_helper_mve_vorri;
1030         }
1031     } else {
1032         /* There is one unallocated cmode/op combination in this space */
1033         if (a->cmode == 15 && a->op == 1) {
1034             return false;
1035         }
1036         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1037         fn = gen_helper_mve_vmovi;
1038     }
1039     return do_1imm(s, a, fn);
1040 }
1041 
1042 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1043                       bool negateshift)
1044 {
1045     TCGv_ptr qd, qm;
1046     int shift = a->shift;
1047 
1048     if (!dc_isar_feature(aa32_mve, s) ||
1049         !mve_check_qreg_bank(s, a->qd | a->qm) ||
1050         !fn) {
1051         return false;
1052     }
1053     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1054         return true;
1055     }
1056 
1057     /*
1058      * When we handle a right shift insn using a left-shift helper
1059      * which permits a negative shift count to indicate a right-shift,
1060      * we must negate the shift count.
1061      */
1062     if (negateshift) {
1063         shift = -shift;
1064     }
1065 
1066     qd = mve_qreg_ptr(a->qd);
1067     qm = mve_qreg_ptr(a->qm);
1068     fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1069     tcg_temp_free_ptr(qd);
1070     tcg_temp_free_ptr(qm);
1071     mve_update_eci(s);
1072     return true;
1073 }
1074 
1075 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)                         \
1076     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1077     {                                                           \
1078         static MVEGenTwoOpShiftFn * const fns[] = {             \
1079             gen_helper_mve_##FN##b,                             \
1080             gen_helper_mve_##FN##h,                             \
1081             gen_helper_mve_##FN##w,                             \
1082             NULL,                                               \
1083         };                                                      \
1084         return do_2shift(s, a, fns[a->size], NEGATESHIFT);      \
1085     }
1086 
1087 DO_2SHIFT(VSHLI, vshli_u, false)
1088 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1089 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1090 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1091 /* These right shifts use a left-shift helper with negated shift count */
1092 DO_2SHIFT(VSHRI_S, vshli_s, true)
1093 DO_2SHIFT(VSHRI_U, vshli_u, true)
1094 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1095 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1096 
1097 DO_2SHIFT(VSRI, vsri, false)
1098 DO_2SHIFT(VSLI, vsli, false)
1099 
1100 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1101                              MVEGenTwoOpShiftFn *fn)
1102 {
1103     TCGv_ptr qda;
1104     TCGv_i32 rm;
1105 
1106     if (!dc_isar_feature(aa32_mve, s) ||
1107         !mve_check_qreg_bank(s, a->qda) ||
1108         a->rm == 13 || a->rm == 15 || !fn) {
1109         /* Rm cases are UNPREDICTABLE */
1110         return false;
1111     }
1112     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1113         return true;
1114     }
1115 
1116     qda = mve_qreg_ptr(a->qda);
1117     rm = load_reg(s, a->rm);
1118     fn(cpu_env, qda, qda, rm);
1119     tcg_temp_free_ptr(qda);
1120     tcg_temp_free_i32(rm);
1121     mve_update_eci(s);
1122     return true;
1123 }
1124 
1125 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
1126     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
1127     {                                                                   \
1128         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1129             gen_helper_mve_##FN##b,                                     \
1130             gen_helper_mve_##FN##h,                                     \
1131             gen_helper_mve_##FN##w,                                     \
1132             NULL,                                                       \
1133         };                                                              \
1134         return do_2shift_scalar(s, a, fns[a->size]);                    \
1135     }
1136 
1137 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1138 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1139 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1140 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1141 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1142 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1143 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1144 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1145 
1146 #define DO_VSHLL(INSN, FN)                                      \
1147     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1148     {                                                           \
1149         static MVEGenTwoOpShiftFn * const fns[] = {             \
1150             gen_helper_mve_##FN##b,                             \
1151             gen_helper_mve_##FN##h,                             \
1152         };                                                      \
1153         return do_2shift(s, a, fns[a->size], false);            \
1154     }
1155 
1156 DO_VSHLL(VSHLL_BS, vshllbs)
1157 DO_VSHLL(VSHLL_BU, vshllbu)
1158 DO_VSHLL(VSHLL_TS, vshllts)
1159 DO_VSHLL(VSHLL_TU, vshlltu)
1160 
1161 #define DO_2SHIFT_N(INSN, FN)                                   \
1162     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1163     {                                                           \
1164         static MVEGenTwoOpShiftFn * const fns[] = {             \
1165             gen_helper_mve_##FN##b,                             \
1166             gen_helper_mve_##FN##h,                             \
1167         };                                                      \
1168         return do_2shift(s, a, fns[a->size], false);            \
1169     }
1170 
1171 DO_2SHIFT_N(VSHRNB, vshrnb)
1172 DO_2SHIFT_N(VSHRNT, vshrnt)
1173 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1174 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1175 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1176 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1177 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1178 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1179 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1180 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1181 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1182 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1183 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1184 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1185 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1186 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1187 
1188 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1189 {
1190     /*
1191      * Whole Vector Left Shift with Carry. The carry is taken
1192      * from a general purpose register and written back there.
1193      * An imm of 0 means "shift by 32".
1194      */
1195     TCGv_ptr qd;
1196     TCGv_i32 rdm;
1197 
1198     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1199         return false;
1200     }
1201     if (a->rdm == 13 || a->rdm == 15) {
1202         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1203         return false;
1204     }
1205     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1206         return true;
1207     }
1208 
1209     qd = mve_qreg_ptr(a->qd);
1210     rdm = load_reg(s, a->rdm);
1211     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1212     store_reg(s, a->rdm, rdm);
1213     tcg_temp_free_ptr(qd);
1214     mve_update_eci(s);
1215     return true;
1216 }
1217 
1218 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1219 {
1220     TCGv_ptr qd;
1221     TCGv_i32 rn;
1222 
1223     /*
1224      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1225      * This fills the vector with elements of successively increasing
1226      * or decreasing values, starting from Rn.
1227      */
1228     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1229         return false;
1230     }
1231     if (a->size == MO_64) {
1232         /* size 0b11 is another encoding */
1233         return false;
1234     }
1235     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1236         return true;
1237     }
1238 
1239     qd = mve_qreg_ptr(a->qd);
1240     rn = load_reg(s, a->rn);
1241     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1242     store_reg(s, a->rn, rn);
1243     tcg_temp_free_ptr(qd);
1244     mve_update_eci(s);
1245     return true;
1246 }
1247 
1248 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1249 {
1250     TCGv_ptr qd;
1251     TCGv_i32 rn, rm;
1252 
1253     /*
1254      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1255      * This fills the vector with elements of successively increasing
1256      * or decreasing values, starting from Rn. Rm specifies a point where
1257      * the count wraps back around to 0. The updated offset is written back
1258      * to Rn.
1259      */
1260     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1261         return false;
1262     }
1263     if (!fn || a->rm == 13 || a->rm == 15) {
1264         /*
1265          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1266          * Rm == 13 is VIWDUP, VDWDUP.
1267          */
1268         return false;
1269     }
1270     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1271         return true;
1272     }
1273 
1274     qd = mve_qreg_ptr(a->qd);
1275     rn = load_reg(s, a->rn);
1276     rm = load_reg(s, a->rm);
1277     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1278     store_reg(s, a->rn, rn);
1279     tcg_temp_free_ptr(qd);
1280     tcg_temp_free_i32(rm);
1281     mve_update_eci(s);
1282     return true;
1283 }
1284 
1285 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1286 {
1287     static MVEGenVIDUPFn * const fns[] = {
1288         gen_helper_mve_vidupb,
1289         gen_helper_mve_viduph,
1290         gen_helper_mve_vidupw,
1291         NULL,
1292     };
1293     return do_vidup(s, a, fns[a->size]);
1294 }
1295 
1296 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1297 {
1298     static MVEGenVIDUPFn * const fns[] = {
1299         gen_helper_mve_vidupb,
1300         gen_helper_mve_viduph,
1301         gen_helper_mve_vidupw,
1302         NULL,
1303     };
1304     /* VDDUP is just like VIDUP but with a negative immediate */
1305     a->imm = -a->imm;
1306     return do_vidup(s, a, fns[a->size]);
1307 }
1308 
1309 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1310 {
1311     static MVEGenVIWDUPFn * const fns[] = {
1312         gen_helper_mve_viwdupb,
1313         gen_helper_mve_viwduph,
1314         gen_helper_mve_viwdupw,
1315         NULL,
1316     };
1317     return do_viwdup(s, a, fns[a->size]);
1318 }
1319 
1320 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1321 {
1322     static MVEGenVIWDUPFn * const fns[] = {
1323         gen_helper_mve_vdwdupb,
1324         gen_helper_mve_vdwduph,
1325         gen_helper_mve_vdwdupw,
1326         NULL,
1327     };
1328     return do_viwdup(s, a, fns[a->size]);
1329 }
1330 
1331 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1332 {
1333     TCGv_ptr qn, qm;
1334 
1335     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1336         !fn) {
1337         return false;
1338     }
1339     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1340         return true;
1341     }
1342 
1343     qn = mve_qreg_ptr(a->qn);
1344     qm = mve_qreg_ptr(a->qm);
1345     fn(cpu_env, qn, qm);
1346     tcg_temp_free_ptr(qn);
1347     tcg_temp_free_ptr(qm);
1348     if (a->mask) {
1349         /* VPT */
1350         gen_vpst(s, a->mask);
1351     }
1352     mve_update_eci(s);
1353     return true;
1354 }
1355 
1356 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1357                            MVEGenScalarCmpFn *fn)
1358 {
1359     TCGv_ptr qn;
1360     TCGv_i32 rm;
1361 
1362     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1363         return false;
1364     }
1365     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1366         return true;
1367     }
1368 
1369     qn = mve_qreg_ptr(a->qn);
1370     if (a->rm == 15) {
1371         /* Encoding Rm=0b1111 means "constant zero" */
1372         rm = tcg_constant_i32(0);
1373     } else {
1374         rm = load_reg(s, a->rm);
1375     }
1376     fn(cpu_env, qn, rm);
1377     tcg_temp_free_ptr(qn);
1378     tcg_temp_free_i32(rm);
1379     if (a->mask) {
1380         /* VPT */
1381         gen_vpst(s, a->mask);
1382     }
1383     mve_update_eci(s);
1384     return true;
1385 }
1386 
1387 #define DO_VCMP(INSN, FN)                                       \
1388     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
1389     {                                                           \
1390         static MVEGenCmpFn * const fns[] = {                    \
1391             gen_helper_mve_##FN##b,                             \
1392             gen_helper_mve_##FN##h,                             \
1393             gen_helper_mve_##FN##w,                             \
1394             NULL,                                               \
1395         };                                                      \
1396         return do_vcmp(s, a, fns[a->size]);                     \
1397     }                                                           \
1398     static bool trans_##INSN##_scalar(DisasContext *s,          \
1399                                       arg_vcmp_scalar *a)       \
1400     {                                                           \
1401         static MVEGenScalarCmpFn * const fns[] = {              \
1402             gen_helper_mve_##FN##_scalarb,                      \
1403             gen_helper_mve_##FN##_scalarh,                      \
1404             gen_helper_mve_##FN##_scalarw,                      \
1405             NULL,                                               \
1406         };                                                      \
1407         return do_vcmp_scalar(s, a, fns[a->size]);              \
1408     }
1409 
1410 DO_VCMP(VCMPEQ, vcmpeq)
1411 DO_VCMP(VCMPNE, vcmpne)
1412 DO_VCMP(VCMPCS, vcmpcs)
1413 DO_VCMP(VCMPHI, vcmphi)
1414 DO_VCMP(VCMPGE, vcmpge)
1415 DO_VCMP(VCMPLT, vcmplt)
1416 DO_VCMP(VCMPGT, vcmpgt)
1417 DO_VCMP(VCMPLE, vcmple)
1418 
1419 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
1420 {
1421     /*
1422      * MIN/MAX operations across a vector: compute the min or
1423      * max of the initial value in a general purpose register
1424      * and all the elements in the vector, and store it back
1425      * into the general purpose register.
1426      */
1427     TCGv_ptr qm;
1428     TCGv_i32 rda;
1429 
1430     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1431         !fn || a->rda == 13 || a->rda == 15) {
1432         /* Rda cases are UNPREDICTABLE */
1433         return false;
1434     }
1435     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1436         return true;
1437     }
1438 
1439     qm = mve_qreg_ptr(a->qm);
1440     rda = load_reg(s, a->rda);
1441     fn(rda, cpu_env, qm, rda);
1442     store_reg(s, a->rda, rda);
1443     tcg_temp_free_ptr(qm);
1444     mve_update_eci(s);
1445     return true;
1446 }
1447 
1448 #define DO_VMAXV(INSN, FN)                                      \
1449     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
1450     {                                                           \
1451         static MVEGenVADDVFn * const fns[] = {                  \
1452             gen_helper_mve_##FN##b,                             \
1453             gen_helper_mve_##FN##h,                             \
1454             gen_helper_mve_##FN##w,                             \
1455             NULL,                                               \
1456         };                                                      \
1457         return do_vmaxv(s, a, fns[a->size]);                    \
1458     }
1459 
1460 DO_VMAXV(VMAXV_S, vmaxvs)
1461 DO_VMAXV(VMAXV_U, vmaxvu)
1462 DO_VMAXV(VMAXAV, vmaxav)
1463 DO_VMAXV(VMINV_S, vminvs)
1464 DO_VMAXV(VMINV_U, vminvu)
1465 DO_VMAXV(VMINAV, vminav)
1466 
1467 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
1468 {
1469     /* Absolute difference accumulated across vector */
1470     TCGv_ptr qn, qm;
1471     TCGv_i32 rda;
1472 
1473     if (!dc_isar_feature(aa32_mve, s) ||
1474         !mve_check_qreg_bank(s, a->qm | a->qn) ||
1475         !fn || a->rda == 13 || a->rda == 15) {
1476         /* Rda cases are UNPREDICTABLE */
1477         return false;
1478     }
1479     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1480         return true;
1481     }
1482 
1483     qm = mve_qreg_ptr(a->qm);
1484     qn = mve_qreg_ptr(a->qn);
1485     rda = load_reg(s, a->rda);
1486     fn(rda, cpu_env, qn, qm, rda);
1487     store_reg(s, a->rda, rda);
1488     tcg_temp_free_ptr(qm);
1489     tcg_temp_free_ptr(qn);
1490     mve_update_eci(s);
1491     return true;
1492 }
1493 
1494 #define DO_VABAV(INSN, FN)                                      \
1495     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
1496     {                                                           \
1497         static MVEGenVABAVFn * const fns[] = {                  \
1498             gen_helper_mve_##FN##b,                             \
1499             gen_helper_mve_##FN##h,                             \
1500             gen_helper_mve_##FN##w,                             \
1501             NULL,                                               \
1502         };                                                      \
1503         return do_vabav(s, a, fns[a->size]);                    \
1504     }
1505 
1506 DO_VABAV(VABAV_S, vabavs)
1507 DO_VABAV(VABAV_U, vabavu)
1508