xref: /qemu/target/arm/tcg/translate-mve.c (revision d5c571ea6d1558934b0d1a95c51a2c084cf4fd85)
1 /*
2  *  ARM translation: M-profile MVE instructions
3  *
4  *  Copyright (c) 2021 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
27 
28 static inline int vidup_imm(DisasContext *s, int x)
29 {
30     return 1 << x;
31 }
32 
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
35 
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
49 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
50 
51 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
52 static inline long mve_qreg_offset(unsigned reg)
53 {
54     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
55 }
56 
57 static TCGv_ptr mve_qreg_ptr(unsigned reg)
58 {
59     TCGv_ptr ret = tcg_temp_new_ptr();
60     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
61     return ret;
62 }
63 
64 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
65 {
66     /*
67      * Check whether Qregs are in range. For v8.1M only Q0..Q7
68      * are supported, see VFPSmallRegisterBank().
69      */
70     return qmask < 8;
71 }
72 
73 bool mve_eci_check(DisasContext *s)
74 {
75     /*
76      * This is a beatwise insn: check that ECI is valid (not a
77      * reserved value) and note that we are handling it.
78      * Return true if OK, false if we generated an exception.
79      */
80     s->eci_handled = true;
81     switch (s->eci) {
82     case ECI_NONE:
83     case ECI_A0:
84     case ECI_A0A1:
85     case ECI_A0A1A2:
86     case ECI_A0A1A2B0:
87         return true;
88     default:
89         /* Reserved value: INVSTATE UsageFault */
90         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
91                            default_exception_el(s));
92         return false;
93     }
94 }
95 
96 static void mve_update_eci(DisasContext *s)
97 {
98     /*
99      * The helper function will always update the CPUState field,
100      * so we only need to update the DisasContext field.
101      */
102     if (s->eci) {
103         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
104     }
105 }
106 
107 void mve_update_and_store_eci(DisasContext *s)
108 {
109     /*
110      * For insns which don't call a helper function that will call
111      * mve_advance_vpt(), this version updates s->eci and also stores
112      * it out to the CPUState field.
113      */
114     if (s->eci) {
115         mve_update_eci(s);
116         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
117     }
118 }
119 
120 static bool mve_skip_first_beat(DisasContext *s)
121 {
122     /* Return true if PSR.ECI says we must skip the first beat of this insn */
123     switch (s->eci) {
124     case ECI_NONE:
125         return false;
126     case ECI_A0:
127     case ECI_A0A1:
128     case ECI_A0A1A2:
129     case ECI_A0A1A2B0:
130         return true;
131     default:
132         g_assert_not_reached();
133     }
134 }
135 
136 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
137                     unsigned msize)
138 {
139     TCGv_i32 addr;
140     uint32_t offset;
141     TCGv_ptr qreg;
142 
143     if (!dc_isar_feature(aa32_mve, s) ||
144         !mve_check_qreg_bank(s, a->qd) ||
145         !fn) {
146         return false;
147     }
148 
149     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
150     if (a->rn == 15 || (a->rn == 13 && a->w)) {
151         return false;
152     }
153 
154     if (!mve_eci_check(s) || !vfp_access_check(s)) {
155         return true;
156     }
157 
158     offset = a->imm << msize;
159     if (!a->a) {
160         offset = -offset;
161     }
162     addr = load_reg(s, a->rn);
163     if (a->p) {
164         tcg_gen_addi_i32(addr, addr, offset);
165     }
166 
167     qreg = mve_qreg_ptr(a->qd);
168     fn(cpu_env, qreg, addr);
169     tcg_temp_free_ptr(qreg);
170 
171     /*
172      * Writeback always happens after the last beat of the insn,
173      * regardless of predication
174      */
175     if (a->w) {
176         if (!a->p) {
177             tcg_gen_addi_i32(addr, addr, offset);
178         }
179         store_reg(s, a->rn, addr);
180     } else {
181         tcg_temp_free_i32(addr);
182     }
183     mve_update_eci(s);
184     return true;
185 }
186 
187 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
188 {
189     static MVEGenLdStFn * const ldstfns[4][2] = {
190         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
191         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
192         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
193         { NULL, NULL }
194     };
195     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
196 }
197 
198 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
199     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
200     {                                                           \
201         static MVEGenLdStFn * const ldstfns[2][2] = {           \
202             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
203             { NULL, gen_helper_mve_##ULD },                     \
204         };                                                      \
205         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
206     }
207 
208 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
209 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
210 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
211 
212 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
213 {
214     TCGv_ptr qd;
215     TCGv_i32 rt;
216 
217     if (!dc_isar_feature(aa32_mve, s) ||
218         !mve_check_qreg_bank(s, a->qd)) {
219         return false;
220     }
221     if (a->rt == 13 || a->rt == 15) {
222         /* UNPREDICTABLE; we choose to UNDEF */
223         return false;
224     }
225     if (!mve_eci_check(s) || !vfp_access_check(s)) {
226         return true;
227     }
228 
229     qd = mve_qreg_ptr(a->qd);
230     rt = load_reg(s, a->rt);
231     tcg_gen_dup_i32(a->size, rt, rt);
232     gen_helper_mve_vdup(cpu_env, qd, rt);
233     tcg_temp_free_ptr(qd);
234     tcg_temp_free_i32(rt);
235     mve_update_eci(s);
236     return true;
237 }
238 
239 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
240 {
241     TCGv_ptr qd, qm;
242 
243     if (!dc_isar_feature(aa32_mve, s) ||
244         !mve_check_qreg_bank(s, a->qd | a->qm) ||
245         !fn) {
246         return false;
247     }
248 
249     if (!mve_eci_check(s) || !vfp_access_check(s)) {
250         return true;
251     }
252 
253     qd = mve_qreg_ptr(a->qd);
254     qm = mve_qreg_ptr(a->qm);
255     fn(cpu_env, qd, qm);
256     tcg_temp_free_ptr(qd);
257     tcg_temp_free_ptr(qm);
258     mve_update_eci(s);
259     return true;
260 }
261 
262 #define DO_1OP(INSN, FN)                                        \
263     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
264     {                                                           \
265         static MVEGenOneOpFn * const fns[] = {                  \
266             gen_helper_mve_##FN##b,                             \
267             gen_helper_mve_##FN##h,                             \
268             gen_helper_mve_##FN##w,                             \
269             NULL,                                               \
270         };                                                      \
271         return do_1op(s, a, fns[a->size]);                      \
272     }
273 
274 DO_1OP(VCLZ, vclz)
275 DO_1OP(VCLS, vcls)
276 DO_1OP(VABS, vabs)
277 DO_1OP(VNEG, vneg)
278 DO_1OP(VQABS, vqabs)
279 DO_1OP(VQNEG, vqneg)
280 DO_1OP(VMAXA, vmaxa)
281 DO_1OP(VMINA, vmina)
282 
283 /* Narrowing moves: only size 0 and 1 are valid */
284 #define DO_VMOVN(INSN, FN) \
285     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
286     {                                                           \
287         static MVEGenOneOpFn * const fns[] = {                  \
288             gen_helper_mve_##FN##b,                             \
289             gen_helper_mve_##FN##h,                             \
290             NULL,                                               \
291             NULL,                                               \
292         };                                                      \
293         return do_1op(s, a, fns[a->size]);                      \
294     }
295 
296 DO_VMOVN(VMOVNB, vmovnb)
297 DO_VMOVN(VMOVNT, vmovnt)
298 DO_VMOVN(VQMOVUNB, vqmovunb)
299 DO_VMOVN(VQMOVUNT, vqmovunt)
300 DO_VMOVN(VQMOVN_BS, vqmovnbs)
301 DO_VMOVN(VQMOVN_TS, vqmovnts)
302 DO_VMOVN(VQMOVN_BU, vqmovnbu)
303 DO_VMOVN(VQMOVN_TU, vqmovntu)
304 
305 static bool trans_VREV16(DisasContext *s, arg_1op *a)
306 {
307     static MVEGenOneOpFn * const fns[] = {
308         gen_helper_mve_vrev16b,
309         NULL,
310         NULL,
311         NULL,
312     };
313     return do_1op(s, a, fns[a->size]);
314 }
315 
316 static bool trans_VREV32(DisasContext *s, arg_1op *a)
317 {
318     static MVEGenOneOpFn * const fns[] = {
319         gen_helper_mve_vrev32b,
320         gen_helper_mve_vrev32h,
321         NULL,
322         NULL,
323     };
324     return do_1op(s, a, fns[a->size]);
325 }
326 
327 static bool trans_VREV64(DisasContext *s, arg_1op *a)
328 {
329     static MVEGenOneOpFn * const fns[] = {
330         gen_helper_mve_vrev64b,
331         gen_helper_mve_vrev64h,
332         gen_helper_mve_vrev64w,
333         NULL,
334     };
335     return do_1op(s, a, fns[a->size]);
336 }
337 
338 static bool trans_VMVN(DisasContext *s, arg_1op *a)
339 {
340     return do_1op(s, a, gen_helper_mve_vmvn);
341 }
342 
343 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
344 {
345     static MVEGenOneOpFn * const fns[] = {
346         NULL,
347         gen_helper_mve_vfabsh,
348         gen_helper_mve_vfabss,
349         NULL,
350     };
351     if (!dc_isar_feature(aa32_mve_fp, s)) {
352         return false;
353     }
354     return do_1op(s, a, fns[a->size]);
355 }
356 
357 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
358 {
359     static MVEGenOneOpFn * const fns[] = {
360         NULL,
361         gen_helper_mve_vfnegh,
362         gen_helper_mve_vfnegs,
363         NULL,
364     };
365     if (!dc_isar_feature(aa32_mve_fp, s)) {
366         return false;
367     }
368     return do_1op(s, a, fns[a->size]);
369 }
370 
371 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
372 {
373     TCGv_ptr qd, qn, qm;
374 
375     if (!dc_isar_feature(aa32_mve, s) ||
376         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
377         !fn) {
378         return false;
379     }
380     if (!mve_eci_check(s) || !vfp_access_check(s)) {
381         return true;
382     }
383 
384     qd = mve_qreg_ptr(a->qd);
385     qn = mve_qreg_ptr(a->qn);
386     qm = mve_qreg_ptr(a->qm);
387     fn(cpu_env, qd, qn, qm);
388     tcg_temp_free_ptr(qd);
389     tcg_temp_free_ptr(qn);
390     tcg_temp_free_ptr(qm);
391     mve_update_eci(s);
392     return true;
393 }
394 
395 #define DO_LOGIC(INSN, HELPER)                                  \
396     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
397     {                                                           \
398         return do_2op(s, a, HELPER);                            \
399     }
400 
401 DO_LOGIC(VAND, gen_helper_mve_vand)
402 DO_LOGIC(VBIC, gen_helper_mve_vbic)
403 DO_LOGIC(VORR, gen_helper_mve_vorr)
404 DO_LOGIC(VORN, gen_helper_mve_vorn)
405 DO_LOGIC(VEOR, gen_helper_mve_veor)
406 
407 DO_LOGIC(VPSEL, gen_helper_mve_vpsel)
408 
409 #define DO_2OP(INSN, FN) \
410     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
411     {                                                           \
412         static MVEGenTwoOpFn * const fns[] = {                  \
413             gen_helper_mve_##FN##b,                             \
414             gen_helper_mve_##FN##h,                             \
415             gen_helper_mve_##FN##w,                             \
416             NULL,                                               \
417         };                                                      \
418         return do_2op(s, a, fns[a->size]);                      \
419     }
420 
421 DO_2OP(VADD, vadd)
422 DO_2OP(VSUB, vsub)
423 DO_2OP(VMUL, vmul)
424 DO_2OP(VMULH_S, vmulhs)
425 DO_2OP(VMULH_U, vmulhu)
426 DO_2OP(VRMULH_S, vrmulhs)
427 DO_2OP(VRMULH_U, vrmulhu)
428 DO_2OP(VMAX_S, vmaxs)
429 DO_2OP(VMAX_U, vmaxu)
430 DO_2OP(VMIN_S, vmins)
431 DO_2OP(VMIN_U, vminu)
432 DO_2OP(VABD_S, vabds)
433 DO_2OP(VABD_U, vabdu)
434 DO_2OP(VHADD_S, vhadds)
435 DO_2OP(VHADD_U, vhaddu)
436 DO_2OP(VHSUB_S, vhsubs)
437 DO_2OP(VHSUB_U, vhsubu)
438 DO_2OP(VMULL_BS, vmullbs)
439 DO_2OP(VMULL_BU, vmullbu)
440 DO_2OP(VMULL_TS, vmullts)
441 DO_2OP(VMULL_TU, vmulltu)
442 DO_2OP(VQDMULH, vqdmulh)
443 DO_2OP(VQRDMULH, vqrdmulh)
444 DO_2OP(VQADD_S, vqadds)
445 DO_2OP(VQADD_U, vqaddu)
446 DO_2OP(VQSUB_S, vqsubs)
447 DO_2OP(VQSUB_U, vqsubu)
448 DO_2OP(VSHL_S, vshls)
449 DO_2OP(VSHL_U, vshlu)
450 DO_2OP(VRSHL_S, vrshls)
451 DO_2OP(VRSHL_U, vrshlu)
452 DO_2OP(VQSHL_S, vqshls)
453 DO_2OP(VQSHL_U, vqshlu)
454 DO_2OP(VQRSHL_S, vqrshls)
455 DO_2OP(VQRSHL_U, vqrshlu)
456 DO_2OP(VQDMLADH, vqdmladh)
457 DO_2OP(VQDMLADHX, vqdmladhx)
458 DO_2OP(VQRDMLADH, vqrdmladh)
459 DO_2OP(VQRDMLADHX, vqrdmladhx)
460 DO_2OP(VQDMLSDH, vqdmlsdh)
461 DO_2OP(VQDMLSDHX, vqdmlsdhx)
462 DO_2OP(VQRDMLSDH, vqrdmlsdh)
463 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
464 DO_2OP(VRHADD_S, vrhadds)
465 DO_2OP(VRHADD_U, vrhaddu)
466 /*
467  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
468  * so we can reuse the DO_2OP macro. (Our implementation calculates the
469  * "expected" results in this case.) Similarly for VHCADD.
470  */
471 DO_2OP(VCADD90, vcadd90)
472 DO_2OP(VCADD270, vcadd270)
473 DO_2OP(VHCADD90, vhcadd90)
474 DO_2OP(VHCADD270, vhcadd270)
475 
476 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
477 {
478     static MVEGenTwoOpFn * const fns[] = {
479         NULL,
480         gen_helper_mve_vqdmullbh,
481         gen_helper_mve_vqdmullbw,
482         NULL,
483     };
484     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
485         /* UNPREDICTABLE; we choose to undef */
486         return false;
487     }
488     return do_2op(s, a, fns[a->size]);
489 }
490 
491 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
492 {
493     static MVEGenTwoOpFn * const fns[] = {
494         NULL,
495         gen_helper_mve_vqdmullth,
496         gen_helper_mve_vqdmulltw,
497         NULL,
498     };
499     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
500         /* UNPREDICTABLE; we choose to undef */
501         return false;
502     }
503     return do_2op(s, a, fns[a->size]);
504 }
505 
506 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
507 {
508     /*
509      * Note that a->size indicates the output size, ie VMULL.P8
510      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
511      * is the 16x16->32 operation and a->size is MO_32.
512      */
513     static MVEGenTwoOpFn * const fns[] = {
514         NULL,
515         gen_helper_mve_vmullpbh,
516         gen_helper_mve_vmullpbw,
517         NULL,
518     };
519     return do_2op(s, a, fns[a->size]);
520 }
521 
522 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
523 {
524     /* a->size is as for trans_VMULLP_B */
525     static MVEGenTwoOpFn * const fns[] = {
526         NULL,
527         gen_helper_mve_vmullpth,
528         gen_helper_mve_vmullptw,
529         NULL,
530     };
531     return do_2op(s, a, fns[a->size]);
532 }
533 
534 /*
535  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
536  * of the 32-bit elements in each lane of the input vectors, where the
537  * carry-out of each add is the carry-in of the next.  The initial carry
538  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
539  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
540  * These insns are subject to beat-wise execution.  Partial execution
541  * of an I=1 (initial carry input fixed) insn which does not
542  * execute the first beat must start with the current FPSCR.NZCV
543  * value, not the fixed constant input.
544  */
545 static bool trans_VADC(DisasContext *s, arg_2op *a)
546 {
547     return do_2op(s, a, gen_helper_mve_vadc);
548 }
549 
550 static bool trans_VADCI(DisasContext *s, arg_2op *a)
551 {
552     if (mve_skip_first_beat(s)) {
553         return trans_VADC(s, a);
554     }
555     return do_2op(s, a, gen_helper_mve_vadci);
556 }
557 
558 static bool trans_VSBC(DisasContext *s, arg_2op *a)
559 {
560     return do_2op(s, a, gen_helper_mve_vsbc);
561 }
562 
563 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
564 {
565     if (mve_skip_first_beat(s)) {
566         return trans_VSBC(s, a);
567     }
568     return do_2op(s, a, gen_helper_mve_vsbci);
569 }
570 
571 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
572                           MVEGenTwoOpScalarFn fn)
573 {
574     TCGv_ptr qd, qn;
575     TCGv_i32 rm;
576 
577     if (!dc_isar_feature(aa32_mve, s) ||
578         !mve_check_qreg_bank(s, a->qd | a->qn) ||
579         !fn) {
580         return false;
581     }
582     if (a->rm == 13 || a->rm == 15) {
583         /* UNPREDICTABLE */
584         return false;
585     }
586     if (!mve_eci_check(s) || !vfp_access_check(s)) {
587         return true;
588     }
589 
590     qd = mve_qreg_ptr(a->qd);
591     qn = mve_qreg_ptr(a->qn);
592     rm = load_reg(s, a->rm);
593     fn(cpu_env, qd, qn, rm);
594     tcg_temp_free_i32(rm);
595     tcg_temp_free_ptr(qd);
596     tcg_temp_free_ptr(qn);
597     mve_update_eci(s);
598     return true;
599 }
600 
601 #define DO_2OP_SCALAR(INSN, FN) \
602     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
603     {                                                           \
604         static MVEGenTwoOpScalarFn * const fns[] = {            \
605             gen_helper_mve_##FN##b,                             \
606             gen_helper_mve_##FN##h,                             \
607             gen_helper_mve_##FN##w,                             \
608             NULL,                                               \
609         };                                                      \
610         return do_2op_scalar(s, a, fns[a->size]);               \
611     }
612 
613 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
614 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
615 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
616 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
617 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
618 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
619 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
620 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
621 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
622 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
623 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
624 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
625 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
626 DO_2OP_SCALAR(VBRSR, vbrsr)
627 DO_2OP_SCALAR(VMLA, vmla)
628 DO_2OP_SCALAR(VMLAS, vmlas)
629 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
630 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
631 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
632 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
633 
634 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
635 {
636     static MVEGenTwoOpScalarFn * const fns[] = {
637         NULL,
638         gen_helper_mve_vqdmullb_scalarh,
639         gen_helper_mve_vqdmullb_scalarw,
640         NULL,
641     };
642     if (a->qd == a->qn && a->size == MO_32) {
643         /* UNPREDICTABLE; we choose to undef */
644         return false;
645     }
646     return do_2op_scalar(s, a, fns[a->size]);
647 }
648 
649 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
650 {
651     static MVEGenTwoOpScalarFn * const fns[] = {
652         NULL,
653         gen_helper_mve_vqdmullt_scalarh,
654         gen_helper_mve_vqdmullt_scalarw,
655         NULL,
656     };
657     if (a->qd == a->qn && a->size == MO_32) {
658         /* UNPREDICTABLE; we choose to undef */
659         return false;
660     }
661     return do_2op_scalar(s, a, fns[a->size]);
662 }
663 
664 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
665                              MVEGenLongDualAccOpFn *fn)
666 {
667     TCGv_ptr qn, qm;
668     TCGv_i64 rda;
669     TCGv_i32 rdalo, rdahi;
670 
671     if (!dc_isar_feature(aa32_mve, s) ||
672         !mve_check_qreg_bank(s, a->qn | a->qm) ||
673         !fn) {
674         return false;
675     }
676     /*
677      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
678      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
679      */
680     if (a->rdahi == 13 || a->rdahi == 15) {
681         return false;
682     }
683     if (!mve_eci_check(s) || !vfp_access_check(s)) {
684         return true;
685     }
686 
687     qn = mve_qreg_ptr(a->qn);
688     qm = mve_qreg_ptr(a->qm);
689 
690     /*
691      * This insn is subject to beat-wise execution. Partial execution
692      * of an A=0 (no-accumulate) insn which does not execute the first
693      * beat must start with the current rda value, not 0.
694      */
695     if (a->a || mve_skip_first_beat(s)) {
696         rda = tcg_temp_new_i64();
697         rdalo = load_reg(s, a->rdalo);
698         rdahi = load_reg(s, a->rdahi);
699         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
700         tcg_temp_free_i32(rdalo);
701         tcg_temp_free_i32(rdahi);
702     } else {
703         rda = tcg_const_i64(0);
704     }
705 
706     fn(rda, cpu_env, qn, qm, rda);
707     tcg_temp_free_ptr(qn);
708     tcg_temp_free_ptr(qm);
709 
710     rdalo = tcg_temp_new_i32();
711     rdahi = tcg_temp_new_i32();
712     tcg_gen_extrl_i64_i32(rdalo, rda);
713     tcg_gen_extrh_i64_i32(rdahi, rda);
714     store_reg(s, a->rdalo, rdalo);
715     store_reg(s, a->rdahi, rdahi);
716     tcg_temp_free_i64(rda);
717     mve_update_eci(s);
718     return true;
719 }
720 
721 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
722 {
723     static MVEGenLongDualAccOpFn * const fns[4][2] = {
724         { NULL, NULL },
725         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
726         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
727         { NULL, NULL },
728     };
729     return do_long_dual_acc(s, a, fns[a->size][a->x]);
730 }
731 
732 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
733 {
734     static MVEGenLongDualAccOpFn * const fns[4][2] = {
735         { NULL, NULL },
736         { gen_helper_mve_vmlaldavuh, NULL },
737         { gen_helper_mve_vmlaldavuw, NULL },
738         { NULL, NULL },
739     };
740     return do_long_dual_acc(s, a, fns[a->size][a->x]);
741 }
742 
743 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
744 {
745     static MVEGenLongDualAccOpFn * const fns[4][2] = {
746         { NULL, NULL },
747         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
748         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
749         { NULL, NULL },
750     };
751     return do_long_dual_acc(s, a, fns[a->size][a->x]);
752 }
753 
754 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
755 {
756     static MVEGenLongDualAccOpFn * const fns[] = {
757         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
758     };
759     return do_long_dual_acc(s, a, fns[a->x]);
760 }
761 
762 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
763 {
764     static MVEGenLongDualAccOpFn * const fns[] = {
765         gen_helper_mve_vrmlaldavhuw, NULL,
766     };
767     return do_long_dual_acc(s, a, fns[a->x]);
768 }
769 
770 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
771 {
772     static MVEGenLongDualAccOpFn * const fns[] = {
773         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
774     };
775     return do_long_dual_acc(s, a, fns[a->x]);
776 }
777 
778 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
779 {
780     TCGv_ptr qn, qm;
781     TCGv_i32 rda;
782 
783     if (!dc_isar_feature(aa32_mve, s) ||
784         !mve_check_qreg_bank(s, a->qn) ||
785         !fn) {
786         return false;
787     }
788     if (!mve_eci_check(s) || !vfp_access_check(s)) {
789         return true;
790     }
791 
792     qn = mve_qreg_ptr(a->qn);
793     qm = mve_qreg_ptr(a->qm);
794 
795     /*
796      * This insn is subject to beat-wise execution. Partial execution
797      * of an A=0 (no-accumulate) insn which does not execute the first
798      * beat must start with the current rda value, not 0.
799      */
800     if (a->a || mve_skip_first_beat(s)) {
801         rda = load_reg(s, a->rda);
802     } else {
803         rda = tcg_const_i32(0);
804     }
805 
806     fn(rda, cpu_env, qn, qm, rda);
807     store_reg(s, a->rda, rda);
808     tcg_temp_free_ptr(qn);
809     tcg_temp_free_ptr(qm);
810 
811     mve_update_eci(s);
812     return true;
813 }
814 
815 #define DO_DUAL_ACC(INSN, FN)                                           \
816     static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
817     {                                                                   \
818         static MVEGenDualAccOpFn * const fns[4][2] = {                  \
819             { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
820             { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
821             { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
822             { NULL, NULL },                                             \
823         };                                                              \
824         return do_dual_acc(s, a, fns[a->size][a->x]);                   \
825     }
826 
827 DO_DUAL_ACC(VMLADAV_S, vmladavs)
828 DO_DUAL_ACC(VMLSDAV, vmlsdav)
829 
830 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
831 {
832     static MVEGenDualAccOpFn * const fns[4][2] = {
833         { gen_helper_mve_vmladavub, NULL },
834         { gen_helper_mve_vmladavuh, NULL },
835         { gen_helper_mve_vmladavuw, NULL },
836         { NULL, NULL },
837     };
838     return do_dual_acc(s, a, fns[a->size][a->x]);
839 }
840 
841 static void gen_vpst(DisasContext *s, uint32_t mask)
842 {
843     /*
844      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
845      * being adjacent fields in the register.
846      *
847      * Updating the masks is not predicated, but it is subject to beat-wise
848      * execution, and the mask is updated on the odd-numbered beats.
849      * So if PSR.ECI says we should skip beat 1, we mustn't update the
850      * 01 mask field.
851      */
852     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
853     switch (s->eci) {
854     case ECI_NONE:
855     case ECI_A0:
856         /* Update both 01 and 23 fields */
857         tcg_gen_deposit_i32(vpr, vpr,
858                             tcg_constant_i32(mask | (mask << 4)),
859                             R_V7M_VPR_MASK01_SHIFT,
860                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
861         break;
862     case ECI_A0A1:
863     case ECI_A0A1A2:
864     case ECI_A0A1A2B0:
865         /* Update only the 23 mask field */
866         tcg_gen_deposit_i32(vpr, vpr,
867                             tcg_constant_i32(mask),
868                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
869         break;
870     default:
871         g_assert_not_reached();
872     }
873     store_cpu_field(vpr, v7m.vpr);
874 }
875 
876 static bool trans_VPST(DisasContext *s, arg_VPST *a)
877 {
878     /* mask == 0 is a "related encoding" */
879     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
880         return false;
881     }
882     if (!mve_eci_check(s) || !vfp_access_check(s)) {
883         return true;
884     }
885     gen_vpst(s, a->mask);
886     mve_update_and_store_eci(s);
887     return true;
888 }
889 
890 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
891 {
892     /* VADDV: vector add across vector */
893     static MVEGenVADDVFn * const fns[4][2] = {
894         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
895         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
896         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
897         { NULL, NULL }
898     };
899     TCGv_ptr qm;
900     TCGv_i32 rda;
901 
902     if (!dc_isar_feature(aa32_mve, s) ||
903         a->size == 3) {
904         return false;
905     }
906     if (!mve_eci_check(s) || !vfp_access_check(s)) {
907         return true;
908     }
909 
910     /*
911      * This insn is subject to beat-wise execution. Partial execution
912      * of an A=0 (no-accumulate) insn which does not execute the first
913      * beat must start with the current value of Rda, not zero.
914      */
915     if (a->a || mve_skip_first_beat(s)) {
916         /* Accumulate input from Rda */
917         rda = load_reg(s, a->rda);
918     } else {
919         /* Accumulate starting at zero */
920         rda = tcg_const_i32(0);
921     }
922 
923     qm = mve_qreg_ptr(a->qm);
924     fns[a->size][a->u](rda, cpu_env, qm, rda);
925     store_reg(s, a->rda, rda);
926     tcg_temp_free_ptr(qm);
927 
928     mve_update_eci(s);
929     return true;
930 }
931 
932 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
933 {
934     /*
935      * Vector Add Long Across Vector: accumulate the 32-bit
936      * elements of the vector into a 64-bit result stored in
937      * a pair of general-purpose registers.
938      * No need to check Qm's bank: it is only 3 bits in decode.
939      */
940     TCGv_ptr qm;
941     TCGv_i64 rda;
942     TCGv_i32 rdalo, rdahi;
943 
944     if (!dc_isar_feature(aa32_mve, s)) {
945         return false;
946     }
947     /*
948      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
949      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
950      */
951     if (a->rdahi == 13 || a->rdahi == 15) {
952         return false;
953     }
954     if (!mve_eci_check(s) || !vfp_access_check(s)) {
955         return true;
956     }
957 
958     /*
959      * This insn is subject to beat-wise execution. Partial execution
960      * of an A=0 (no-accumulate) insn which does not execute the first
961      * beat must start with the current value of RdaHi:RdaLo, not zero.
962      */
963     if (a->a || mve_skip_first_beat(s)) {
964         /* Accumulate input from RdaHi:RdaLo */
965         rda = tcg_temp_new_i64();
966         rdalo = load_reg(s, a->rdalo);
967         rdahi = load_reg(s, a->rdahi);
968         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
969         tcg_temp_free_i32(rdalo);
970         tcg_temp_free_i32(rdahi);
971     } else {
972         /* Accumulate starting at zero */
973         rda = tcg_const_i64(0);
974     }
975 
976     qm = mve_qreg_ptr(a->qm);
977     if (a->u) {
978         gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
979     } else {
980         gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
981     }
982     tcg_temp_free_ptr(qm);
983 
984     rdalo = tcg_temp_new_i32();
985     rdahi = tcg_temp_new_i32();
986     tcg_gen_extrl_i64_i32(rdalo, rda);
987     tcg_gen_extrh_i64_i32(rdahi, rda);
988     store_reg(s, a->rdalo, rdalo);
989     store_reg(s, a->rdahi, rdahi);
990     tcg_temp_free_i64(rda);
991     mve_update_eci(s);
992     return true;
993 }
994 
995 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
996 {
997     TCGv_ptr qd;
998     uint64_t imm;
999 
1000     if (!dc_isar_feature(aa32_mve, s) ||
1001         !mve_check_qreg_bank(s, a->qd) ||
1002         !fn) {
1003         return false;
1004     }
1005     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1006         return true;
1007     }
1008 
1009     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1010 
1011     qd = mve_qreg_ptr(a->qd);
1012     fn(cpu_env, qd, tcg_constant_i64(imm));
1013     tcg_temp_free_ptr(qd);
1014     mve_update_eci(s);
1015     return true;
1016 }
1017 
1018 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1019 {
1020     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1021     MVEGenOneOpImmFn *fn;
1022 
1023     if ((a->cmode & 1) && a->cmode < 12) {
1024         if (a->op) {
1025             /*
1026              * For op=1, the immediate will be inverted by asimd_imm_const(),
1027              * so the VBIC becomes a logical AND operation.
1028              */
1029             fn = gen_helper_mve_vandi;
1030         } else {
1031             fn = gen_helper_mve_vorri;
1032         }
1033     } else {
1034         /* There is one unallocated cmode/op combination in this space */
1035         if (a->cmode == 15 && a->op == 1) {
1036             return false;
1037         }
1038         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1039         fn = gen_helper_mve_vmovi;
1040     }
1041     return do_1imm(s, a, fn);
1042 }
1043 
1044 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1045                       bool negateshift)
1046 {
1047     TCGv_ptr qd, qm;
1048     int shift = a->shift;
1049 
1050     if (!dc_isar_feature(aa32_mve, s) ||
1051         !mve_check_qreg_bank(s, a->qd | a->qm) ||
1052         !fn) {
1053         return false;
1054     }
1055     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1056         return true;
1057     }
1058 
1059     /*
1060      * When we handle a right shift insn using a left-shift helper
1061      * which permits a negative shift count to indicate a right-shift,
1062      * we must negate the shift count.
1063      */
1064     if (negateshift) {
1065         shift = -shift;
1066     }
1067 
1068     qd = mve_qreg_ptr(a->qd);
1069     qm = mve_qreg_ptr(a->qm);
1070     fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1071     tcg_temp_free_ptr(qd);
1072     tcg_temp_free_ptr(qm);
1073     mve_update_eci(s);
1074     return true;
1075 }
1076 
1077 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)                         \
1078     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1079     {                                                           \
1080         static MVEGenTwoOpShiftFn * const fns[] = {             \
1081             gen_helper_mve_##FN##b,                             \
1082             gen_helper_mve_##FN##h,                             \
1083             gen_helper_mve_##FN##w,                             \
1084             NULL,                                               \
1085         };                                                      \
1086         return do_2shift(s, a, fns[a->size], NEGATESHIFT);      \
1087     }
1088 
1089 DO_2SHIFT(VSHLI, vshli_u, false)
1090 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1091 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1092 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1093 /* These right shifts use a left-shift helper with negated shift count */
1094 DO_2SHIFT(VSHRI_S, vshli_s, true)
1095 DO_2SHIFT(VSHRI_U, vshli_u, true)
1096 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1097 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1098 
1099 DO_2SHIFT(VSRI, vsri, false)
1100 DO_2SHIFT(VSLI, vsli, false)
1101 
1102 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1103                              MVEGenTwoOpShiftFn *fn)
1104 {
1105     TCGv_ptr qda;
1106     TCGv_i32 rm;
1107 
1108     if (!dc_isar_feature(aa32_mve, s) ||
1109         !mve_check_qreg_bank(s, a->qda) ||
1110         a->rm == 13 || a->rm == 15 || !fn) {
1111         /* Rm cases are UNPREDICTABLE */
1112         return false;
1113     }
1114     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1115         return true;
1116     }
1117 
1118     qda = mve_qreg_ptr(a->qda);
1119     rm = load_reg(s, a->rm);
1120     fn(cpu_env, qda, qda, rm);
1121     tcg_temp_free_ptr(qda);
1122     tcg_temp_free_i32(rm);
1123     mve_update_eci(s);
1124     return true;
1125 }
1126 
1127 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
1128     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
1129     {                                                                   \
1130         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1131             gen_helper_mve_##FN##b,                                     \
1132             gen_helper_mve_##FN##h,                                     \
1133             gen_helper_mve_##FN##w,                                     \
1134             NULL,                                                       \
1135         };                                                              \
1136         return do_2shift_scalar(s, a, fns[a->size]);                    \
1137     }
1138 
1139 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1140 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1141 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1142 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1143 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1144 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1145 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1146 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1147 
1148 #define DO_VSHLL(INSN, FN)                                      \
1149     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1150     {                                                           \
1151         static MVEGenTwoOpShiftFn * const fns[] = {             \
1152             gen_helper_mve_##FN##b,                             \
1153             gen_helper_mve_##FN##h,                             \
1154         };                                                      \
1155         return do_2shift(s, a, fns[a->size], false);            \
1156     }
1157 
1158 DO_VSHLL(VSHLL_BS, vshllbs)
1159 DO_VSHLL(VSHLL_BU, vshllbu)
1160 DO_VSHLL(VSHLL_TS, vshllts)
1161 DO_VSHLL(VSHLL_TU, vshlltu)
1162 
1163 #define DO_2SHIFT_N(INSN, FN)                                   \
1164     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1165     {                                                           \
1166         static MVEGenTwoOpShiftFn * const fns[] = {             \
1167             gen_helper_mve_##FN##b,                             \
1168             gen_helper_mve_##FN##h,                             \
1169         };                                                      \
1170         return do_2shift(s, a, fns[a->size], false);            \
1171     }
1172 
1173 DO_2SHIFT_N(VSHRNB, vshrnb)
1174 DO_2SHIFT_N(VSHRNT, vshrnt)
1175 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1176 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1177 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1178 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1179 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1180 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1181 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1182 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1183 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1184 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1185 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1186 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1187 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1188 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1189 
1190 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1191 {
1192     /*
1193      * Whole Vector Left Shift with Carry. The carry is taken
1194      * from a general purpose register and written back there.
1195      * An imm of 0 means "shift by 32".
1196      */
1197     TCGv_ptr qd;
1198     TCGv_i32 rdm;
1199 
1200     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1201         return false;
1202     }
1203     if (a->rdm == 13 || a->rdm == 15) {
1204         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1205         return false;
1206     }
1207     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1208         return true;
1209     }
1210 
1211     qd = mve_qreg_ptr(a->qd);
1212     rdm = load_reg(s, a->rdm);
1213     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1214     store_reg(s, a->rdm, rdm);
1215     tcg_temp_free_ptr(qd);
1216     mve_update_eci(s);
1217     return true;
1218 }
1219 
1220 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1221 {
1222     TCGv_ptr qd;
1223     TCGv_i32 rn;
1224 
1225     /*
1226      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1227      * This fills the vector with elements of successively increasing
1228      * or decreasing values, starting from Rn.
1229      */
1230     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1231         return false;
1232     }
1233     if (a->size == MO_64) {
1234         /* size 0b11 is another encoding */
1235         return false;
1236     }
1237     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1238         return true;
1239     }
1240 
1241     qd = mve_qreg_ptr(a->qd);
1242     rn = load_reg(s, a->rn);
1243     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1244     store_reg(s, a->rn, rn);
1245     tcg_temp_free_ptr(qd);
1246     mve_update_eci(s);
1247     return true;
1248 }
1249 
1250 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1251 {
1252     TCGv_ptr qd;
1253     TCGv_i32 rn, rm;
1254 
1255     /*
1256      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1257      * This fills the vector with elements of successively increasing
1258      * or decreasing values, starting from Rn. Rm specifies a point where
1259      * the count wraps back around to 0. The updated offset is written back
1260      * to Rn.
1261      */
1262     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1263         return false;
1264     }
1265     if (!fn || a->rm == 13 || a->rm == 15) {
1266         /*
1267          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1268          * Rm == 13 is VIWDUP, VDWDUP.
1269          */
1270         return false;
1271     }
1272     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1273         return true;
1274     }
1275 
1276     qd = mve_qreg_ptr(a->qd);
1277     rn = load_reg(s, a->rn);
1278     rm = load_reg(s, a->rm);
1279     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1280     store_reg(s, a->rn, rn);
1281     tcg_temp_free_ptr(qd);
1282     tcg_temp_free_i32(rm);
1283     mve_update_eci(s);
1284     return true;
1285 }
1286 
1287 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1288 {
1289     static MVEGenVIDUPFn * const fns[] = {
1290         gen_helper_mve_vidupb,
1291         gen_helper_mve_viduph,
1292         gen_helper_mve_vidupw,
1293         NULL,
1294     };
1295     return do_vidup(s, a, fns[a->size]);
1296 }
1297 
1298 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1299 {
1300     static MVEGenVIDUPFn * const fns[] = {
1301         gen_helper_mve_vidupb,
1302         gen_helper_mve_viduph,
1303         gen_helper_mve_vidupw,
1304         NULL,
1305     };
1306     /* VDDUP is just like VIDUP but with a negative immediate */
1307     a->imm = -a->imm;
1308     return do_vidup(s, a, fns[a->size]);
1309 }
1310 
1311 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1312 {
1313     static MVEGenVIWDUPFn * const fns[] = {
1314         gen_helper_mve_viwdupb,
1315         gen_helper_mve_viwduph,
1316         gen_helper_mve_viwdupw,
1317         NULL,
1318     };
1319     return do_viwdup(s, a, fns[a->size]);
1320 }
1321 
1322 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1323 {
1324     static MVEGenVIWDUPFn * const fns[] = {
1325         gen_helper_mve_vdwdupb,
1326         gen_helper_mve_vdwduph,
1327         gen_helper_mve_vdwdupw,
1328         NULL,
1329     };
1330     return do_viwdup(s, a, fns[a->size]);
1331 }
1332 
1333 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1334 {
1335     TCGv_ptr qn, qm;
1336 
1337     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1338         !fn) {
1339         return false;
1340     }
1341     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1342         return true;
1343     }
1344 
1345     qn = mve_qreg_ptr(a->qn);
1346     qm = mve_qreg_ptr(a->qm);
1347     fn(cpu_env, qn, qm);
1348     tcg_temp_free_ptr(qn);
1349     tcg_temp_free_ptr(qm);
1350     if (a->mask) {
1351         /* VPT */
1352         gen_vpst(s, a->mask);
1353     }
1354     mve_update_eci(s);
1355     return true;
1356 }
1357 
1358 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1359                            MVEGenScalarCmpFn *fn)
1360 {
1361     TCGv_ptr qn;
1362     TCGv_i32 rm;
1363 
1364     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1365         return false;
1366     }
1367     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1368         return true;
1369     }
1370 
1371     qn = mve_qreg_ptr(a->qn);
1372     if (a->rm == 15) {
1373         /* Encoding Rm=0b1111 means "constant zero" */
1374         rm = tcg_constant_i32(0);
1375     } else {
1376         rm = load_reg(s, a->rm);
1377     }
1378     fn(cpu_env, qn, rm);
1379     tcg_temp_free_ptr(qn);
1380     tcg_temp_free_i32(rm);
1381     if (a->mask) {
1382         /* VPT */
1383         gen_vpst(s, a->mask);
1384     }
1385     mve_update_eci(s);
1386     return true;
1387 }
1388 
1389 #define DO_VCMP(INSN, FN)                                       \
1390     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
1391     {                                                           \
1392         static MVEGenCmpFn * const fns[] = {                    \
1393             gen_helper_mve_##FN##b,                             \
1394             gen_helper_mve_##FN##h,                             \
1395             gen_helper_mve_##FN##w,                             \
1396             NULL,                                               \
1397         };                                                      \
1398         return do_vcmp(s, a, fns[a->size]);                     \
1399     }                                                           \
1400     static bool trans_##INSN##_scalar(DisasContext *s,          \
1401                                       arg_vcmp_scalar *a)       \
1402     {                                                           \
1403         static MVEGenScalarCmpFn * const fns[] = {              \
1404             gen_helper_mve_##FN##_scalarb,                      \
1405             gen_helper_mve_##FN##_scalarh,                      \
1406             gen_helper_mve_##FN##_scalarw,                      \
1407             NULL,                                               \
1408         };                                                      \
1409         return do_vcmp_scalar(s, a, fns[a->size]);              \
1410     }
1411 
1412 DO_VCMP(VCMPEQ, vcmpeq)
1413 DO_VCMP(VCMPNE, vcmpne)
1414 DO_VCMP(VCMPCS, vcmpcs)
1415 DO_VCMP(VCMPHI, vcmphi)
1416 DO_VCMP(VCMPGE, vcmpge)
1417 DO_VCMP(VCMPLT, vcmplt)
1418 DO_VCMP(VCMPGT, vcmpgt)
1419 DO_VCMP(VCMPLE, vcmple)
1420 
1421 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
1422 {
1423     /*
1424      * MIN/MAX operations across a vector: compute the min or
1425      * max of the initial value in a general purpose register
1426      * and all the elements in the vector, and store it back
1427      * into the general purpose register.
1428      */
1429     TCGv_ptr qm;
1430     TCGv_i32 rda;
1431 
1432     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1433         !fn || a->rda == 13 || a->rda == 15) {
1434         /* Rda cases are UNPREDICTABLE */
1435         return false;
1436     }
1437     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1438         return true;
1439     }
1440 
1441     qm = mve_qreg_ptr(a->qm);
1442     rda = load_reg(s, a->rda);
1443     fn(rda, cpu_env, qm, rda);
1444     store_reg(s, a->rda, rda);
1445     tcg_temp_free_ptr(qm);
1446     mve_update_eci(s);
1447     return true;
1448 }
1449 
1450 #define DO_VMAXV(INSN, FN)                                      \
1451     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
1452     {                                                           \
1453         static MVEGenVADDVFn * const fns[] = {                  \
1454             gen_helper_mve_##FN##b,                             \
1455             gen_helper_mve_##FN##h,                             \
1456             gen_helper_mve_##FN##w,                             \
1457             NULL,                                               \
1458         };                                                      \
1459         return do_vmaxv(s, a, fns[a->size]);                    \
1460     }
1461 
1462 DO_VMAXV(VMAXV_S, vmaxvs)
1463 DO_VMAXV(VMAXV_U, vmaxvu)
1464 DO_VMAXV(VMAXAV, vmaxav)
1465 DO_VMAXV(VMINV_S, vminvs)
1466 DO_VMAXV(VMINV_U, vminvu)
1467 DO_VMAXV(VMINAV, vminav)
1468 
1469 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
1470 {
1471     /* Absolute difference accumulated across vector */
1472     TCGv_ptr qn, qm;
1473     TCGv_i32 rda;
1474 
1475     if (!dc_isar_feature(aa32_mve, s) ||
1476         !mve_check_qreg_bank(s, a->qm | a->qn) ||
1477         !fn || a->rda == 13 || a->rda == 15) {
1478         /* Rda cases are UNPREDICTABLE */
1479         return false;
1480     }
1481     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1482         return true;
1483     }
1484 
1485     qm = mve_qreg_ptr(a->qm);
1486     qn = mve_qreg_ptr(a->qn);
1487     rda = load_reg(s, a->rda);
1488     fn(rda, cpu_env, qn, qm, rda);
1489     store_reg(s, a->rda, rda);
1490     tcg_temp_free_ptr(qm);
1491     tcg_temp_free_ptr(qn);
1492     mve_update_eci(s);
1493     return true;
1494 }
1495 
1496 #define DO_VABAV(INSN, FN)                                      \
1497     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
1498     {                                                           \
1499         static MVEGenVABAVFn * const fns[] = {                  \
1500             gen_helper_mve_##FN##b,                             \
1501             gen_helper_mve_##FN##h,                             \
1502             gen_helper_mve_##FN##w,                             \
1503             NULL,                                               \
1504         };                                                      \
1505         return do_vabav(s, a, fns[a->size]);                    \
1506     }
1507 
1508 DO_VABAV(VABAV_S, vabavs)
1509 DO_VABAV(VABAV_U, vabavu)
1510