xref: /qemu/target/arm/tcg/translate-mve.c (revision c69e34c6debfb567f6118b59e6efa96a20765dda)
1 /*
2  *  ARM translation: M-profile MVE instructions
3  *
4  *  Copyright (c) 2021 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
27 
28 static inline int vidup_imm(DisasContext *s, int x)
29 {
30     return 1 << x;
31 }
32 
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
35 
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
49 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
50 
51 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
52 static inline long mve_qreg_offset(unsigned reg)
53 {
54     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
55 }
56 
57 static TCGv_ptr mve_qreg_ptr(unsigned reg)
58 {
59     TCGv_ptr ret = tcg_temp_new_ptr();
60     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
61     return ret;
62 }
63 
64 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
65 {
66     /*
67      * Check whether Qregs are in range. For v8.1M only Q0..Q7
68      * are supported, see VFPSmallRegisterBank().
69      */
70     return qmask < 8;
71 }
72 
73 bool mve_eci_check(DisasContext *s)
74 {
75     /*
76      * This is a beatwise insn: check that ECI is valid (not a
77      * reserved value) and note that we are handling it.
78      * Return true if OK, false if we generated an exception.
79      */
80     s->eci_handled = true;
81     switch (s->eci) {
82     case ECI_NONE:
83     case ECI_A0:
84     case ECI_A0A1:
85     case ECI_A0A1A2:
86     case ECI_A0A1A2B0:
87         return true;
88     default:
89         /* Reserved value: INVSTATE UsageFault */
90         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
91                            default_exception_el(s));
92         return false;
93     }
94 }
95 
96 static void mve_update_eci(DisasContext *s)
97 {
98     /*
99      * The helper function will always update the CPUState field,
100      * so we only need to update the DisasContext field.
101      */
102     if (s->eci) {
103         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
104     }
105 }
106 
107 void mve_update_and_store_eci(DisasContext *s)
108 {
109     /*
110      * For insns which don't call a helper function that will call
111      * mve_advance_vpt(), this version updates s->eci and also stores
112      * it out to the CPUState field.
113      */
114     if (s->eci) {
115         mve_update_eci(s);
116         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
117     }
118 }
119 
120 static bool mve_skip_first_beat(DisasContext *s)
121 {
122     /* Return true if PSR.ECI says we must skip the first beat of this insn */
123     switch (s->eci) {
124     case ECI_NONE:
125         return false;
126     case ECI_A0:
127     case ECI_A0A1:
128     case ECI_A0A1A2:
129     case ECI_A0A1A2B0:
130         return true;
131     default:
132         g_assert_not_reached();
133     }
134 }
135 
136 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
137                     unsigned msize)
138 {
139     TCGv_i32 addr;
140     uint32_t offset;
141     TCGv_ptr qreg;
142 
143     if (!dc_isar_feature(aa32_mve, s) ||
144         !mve_check_qreg_bank(s, a->qd) ||
145         !fn) {
146         return false;
147     }
148 
149     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
150     if (a->rn == 15 || (a->rn == 13 && a->w)) {
151         return false;
152     }
153 
154     if (!mve_eci_check(s) || !vfp_access_check(s)) {
155         return true;
156     }
157 
158     offset = a->imm << msize;
159     if (!a->a) {
160         offset = -offset;
161     }
162     addr = load_reg(s, a->rn);
163     if (a->p) {
164         tcg_gen_addi_i32(addr, addr, offset);
165     }
166 
167     qreg = mve_qreg_ptr(a->qd);
168     fn(cpu_env, qreg, addr);
169     tcg_temp_free_ptr(qreg);
170 
171     /*
172      * Writeback always happens after the last beat of the insn,
173      * regardless of predication
174      */
175     if (a->w) {
176         if (!a->p) {
177             tcg_gen_addi_i32(addr, addr, offset);
178         }
179         store_reg(s, a->rn, addr);
180     } else {
181         tcg_temp_free_i32(addr);
182     }
183     mve_update_eci(s);
184     return true;
185 }
186 
187 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
188 {
189     static MVEGenLdStFn * const ldstfns[4][2] = {
190         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
191         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
192         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
193         { NULL, NULL }
194     };
195     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
196 }
197 
198 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
199     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
200     {                                                           \
201         static MVEGenLdStFn * const ldstfns[2][2] = {           \
202             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
203             { NULL, gen_helper_mve_##ULD },                     \
204         };                                                      \
205         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
206     }
207 
208 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
209 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
210 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
211 
212 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
213 {
214     TCGv_ptr qd;
215     TCGv_i32 rt;
216 
217     if (!dc_isar_feature(aa32_mve, s) ||
218         !mve_check_qreg_bank(s, a->qd)) {
219         return false;
220     }
221     if (a->rt == 13 || a->rt == 15) {
222         /* UNPREDICTABLE; we choose to UNDEF */
223         return false;
224     }
225     if (!mve_eci_check(s) || !vfp_access_check(s)) {
226         return true;
227     }
228 
229     qd = mve_qreg_ptr(a->qd);
230     rt = load_reg(s, a->rt);
231     tcg_gen_dup_i32(a->size, rt, rt);
232     gen_helper_mve_vdup(cpu_env, qd, rt);
233     tcg_temp_free_ptr(qd);
234     tcg_temp_free_i32(rt);
235     mve_update_eci(s);
236     return true;
237 }
238 
239 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
240 {
241     TCGv_ptr qd, qm;
242 
243     if (!dc_isar_feature(aa32_mve, s) ||
244         !mve_check_qreg_bank(s, a->qd | a->qm) ||
245         !fn) {
246         return false;
247     }
248 
249     if (!mve_eci_check(s) || !vfp_access_check(s)) {
250         return true;
251     }
252 
253     qd = mve_qreg_ptr(a->qd);
254     qm = mve_qreg_ptr(a->qm);
255     fn(cpu_env, qd, qm);
256     tcg_temp_free_ptr(qd);
257     tcg_temp_free_ptr(qm);
258     mve_update_eci(s);
259     return true;
260 }
261 
262 #define DO_1OP(INSN, FN)                                        \
263     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
264     {                                                           \
265         static MVEGenOneOpFn * const fns[] = {                  \
266             gen_helper_mve_##FN##b,                             \
267             gen_helper_mve_##FN##h,                             \
268             gen_helper_mve_##FN##w,                             \
269             NULL,                                               \
270         };                                                      \
271         return do_1op(s, a, fns[a->size]);                      \
272     }
273 
274 DO_1OP(VCLZ, vclz)
275 DO_1OP(VCLS, vcls)
276 DO_1OP(VABS, vabs)
277 DO_1OP(VNEG, vneg)
278 
279 /* Narrowing moves: only size 0 and 1 are valid */
280 #define DO_VMOVN(INSN, FN) \
281     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
282     {                                                           \
283         static MVEGenOneOpFn * const fns[] = {                  \
284             gen_helper_mve_##FN##b,                             \
285             gen_helper_mve_##FN##h,                             \
286             NULL,                                               \
287             NULL,                                               \
288         };                                                      \
289         return do_1op(s, a, fns[a->size]);                      \
290     }
291 
292 DO_VMOVN(VMOVNB, vmovnb)
293 DO_VMOVN(VMOVNT, vmovnt)
294 DO_VMOVN(VQMOVUNB, vqmovunb)
295 DO_VMOVN(VQMOVUNT, vqmovunt)
296 DO_VMOVN(VQMOVN_BS, vqmovnbs)
297 DO_VMOVN(VQMOVN_TS, vqmovnts)
298 DO_VMOVN(VQMOVN_BU, vqmovnbu)
299 DO_VMOVN(VQMOVN_TU, vqmovntu)
300 
301 static bool trans_VREV16(DisasContext *s, arg_1op *a)
302 {
303     static MVEGenOneOpFn * const fns[] = {
304         gen_helper_mve_vrev16b,
305         NULL,
306         NULL,
307         NULL,
308     };
309     return do_1op(s, a, fns[a->size]);
310 }
311 
312 static bool trans_VREV32(DisasContext *s, arg_1op *a)
313 {
314     static MVEGenOneOpFn * const fns[] = {
315         gen_helper_mve_vrev32b,
316         gen_helper_mve_vrev32h,
317         NULL,
318         NULL,
319     };
320     return do_1op(s, a, fns[a->size]);
321 }
322 
323 static bool trans_VREV64(DisasContext *s, arg_1op *a)
324 {
325     static MVEGenOneOpFn * const fns[] = {
326         gen_helper_mve_vrev64b,
327         gen_helper_mve_vrev64h,
328         gen_helper_mve_vrev64w,
329         NULL,
330     };
331     return do_1op(s, a, fns[a->size]);
332 }
333 
334 static bool trans_VMVN(DisasContext *s, arg_1op *a)
335 {
336     return do_1op(s, a, gen_helper_mve_vmvn);
337 }
338 
339 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
340 {
341     static MVEGenOneOpFn * const fns[] = {
342         NULL,
343         gen_helper_mve_vfabsh,
344         gen_helper_mve_vfabss,
345         NULL,
346     };
347     if (!dc_isar_feature(aa32_mve_fp, s)) {
348         return false;
349     }
350     return do_1op(s, a, fns[a->size]);
351 }
352 
353 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
354 {
355     static MVEGenOneOpFn * const fns[] = {
356         NULL,
357         gen_helper_mve_vfnegh,
358         gen_helper_mve_vfnegs,
359         NULL,
360     };
361     if (!dc_isar_feature(aa32_mve_fp, s)) {
362         return false;
363     }
364     return do_1op(s, a, fns[a->size]);
365 }
366 
367 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
368 {
369     TCGv_ptr qd, qn, qm;
370 
371     if (!dc_isar_feature(aa32_mve, s) ||
372         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
373         !fn) {
374         return false;
375     }
376     if (!mve_eci_check(s) || !vfp_access_check(s)) {
377         return true;
378     }
379 
380     qd = mve_qreg_ptr(a->qd);
381     qn = mve_qreg_ptr(a->qn);
382     qm = mve_qreg_ptr(a->qm);
383     fn(cpu_env, qd, qn, qm);
384     tcg_temp_free_ptr(qd);
385     tcg_temp_free_ptr(qn);
386     tcg_temp_free_ptr(qm);
387     mve_update_eci(s);
388     return true;
389 }
390 
391 #define DO_LOGIC(INSN, HELPER)                                  \
392     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
393     {                                                           \
394         return do_2op(s, a, HELPER);                            \
395     }
396 
397 DO_LOGIC(VAND, gen_helper_mve_vand)
398 DO_LOGIC(VBIC, gen_helper_mve_vbic)
399 DO_LOGIC(VORR, gen_helper_mve_vorr)
400 DO_LOGIC(VORN, gen_helper_mve_vorn)
401 DO_LOGIC(VEOR, gen_helper_mve_veor)
402 
403 DO_LOGIC(VPSEL, gen_helper_mve_vpsel)
404 
405 #define DO_2OP(INSN, FN) \
406     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
407     {                                                           \
408         static MVEGenTwoOpFn * const fns[] = {                  \
409             gen_helper_mve_##FN##b,                             \
410             gen_helper_mve_##FN##h,                             \
411             gen_helper_mve_##FN##w,                             \
412             NULL,                                               \
413         };                                                      \
414         return do_2op(s, a, fns[a->size]);                      \
415     }
416 
417 DO_2OP(VADD, vadd)
418 DO_2OP(VSUB, vsub)
419 DO_2OP(VMUL, vmul)
420 DO_2OP(VMULH_S, vmulhs)
421 DO_2OP(VMULH_U, vmulhu)
422 DO_2OP(VRMULH_S, vrmulhs)
423 DO_2OP(VRMULH_U, vrmulhu)
424 DO_2OP(VMAX_S, vmaxs)
425 DO_2OP(VMAX_U, vmaxu)
426 DO_2OP(VMIN_S, vmins)
427 DO_2OP(VMIN_U, vminu)
428 DO_2OP(VABD_S, vabds)
429 DO_2OP(VABD_U, vabdu)
430 DO_2OP(VHADD_S, vhadds)
431 DO_2OP(VHADD_U, vhaddu)
432 DO_2OP(VHSUB_S, vhsubs)
433 DO_2OP(VHSUB_U, vhsubu)
434 DO_2OP(VMULL_BS, vmullbs)
435 DO_2OP(VMULL_BU, vmullbu)
436 DO_2OP(VMULL_TS, vmullts)
437 DO_2OP(VMULL_TU, vmulltu)
438 DO_2OP(VQDMULH, vqdmulh)
439 DO_2OP(VQRDMULH, vqrdmulh)
440 DO_2OP(VQADD_S, vqadds)
441 DO_2OP(VQADD_U, vqaddu)
442 DO_2OP(VQSUB_S, vqsubs)
443 DO_2OP(VQSUB_U, vqsubu)
444 DO_2OP(VSHL_S, vshls)
445 DO_2OP(VSHL_U, vshlu)
446 DO_2OP(VRSHL_S, vrshls)
447 DO_2OP(VRSHL_U, vrshlu)
448 DO_2OP(VQSHL_S, vqshls)
449 DO_2OP(VQSHL_U, vqshlu)
450 DO_2OP(VQRSHL_S, vqrshls)
451 DO_2OP(VQRSHL_U, vqrshlu)
452 DO_2OP(VQDMLADH, vqdmladh)
453 DO_2OP(VQDMLADHX, vqdmladhx)
454 DO_2OP(VQRDMLADH, vqrdmladh)
455 DO_2OP(VQRDMLADHX, vqrdmladhx)
456 DO_2OP(VQDMLSDH, vqdmlsdh)
457 DO_2OP(VQDMLSDHX, vqdmlsdhx)
458 DO_2OP(VQRDMLSDH, vqrdmlsdh)
459 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
460 DO_2OP(VRHADD_S, vrhadds)
461 DO_2OP(VRHADD_U, vrhaddu)
462 /*
463  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
464  * so we can reuse the DO_2OP macro. (Our implementation calculates the
465  * "expected" results in this case.) Similarly for VHCADD.
466  */
467 DO_2OP(VCADD90, vcadd90)
468 DO_2OP(VCADD270, vcadd270)
469 DO_2OP(VHCADD90, vhcadd90)
470 DO_2OP(VHCADD270, vhcadd270)
471 
472 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
473 {
474     static MVEGenTwoOpFn * const fns[] = {
475         NULL,
476         gen_helper_mve_vqdmullbh,
477         gen_helper_mve_vqdmullbw,
478         NULL,
479     };
480     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
481         /* UNPREDICTABLE; we choose to undef */
482         return false;
483     }
484     return do_2op(s, a, fns[a->size]);
485 }
486 
487 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
488 {
489     static MVEGenTwoOpFn * const fns[] = {
490         NULL,
491         gen_helper_mve_vqdmullth,
492         gen_helper_mve_vqdmulltw,
493         NULL,
494     };
495     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
496         /* UNPREDICTABLE; we choose to undef */
497         return false;
498     }
499     return do_2op(s, a, fns[a->size]);
500 }
501 
502 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
503 {
504     /*
505      * Note that a->size indicates the output size, ie VMULL.P8
506      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
507      * is the 16x16->32 operation and a->size is MO_32.
508      */
509     static MVEGenTwoOpFn * const fns[] = {
510         NULL,
511         gen_helper_mve_vmullpbh,
512         gen_helper_mve_vmullpbw,
513         NULL,
514     };
515     return do_2op(s, a, fns[a->size]);
516 }
517 
518 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
519 {
520     /* a->size is as for trans_VMULLP_B */
521     static MVEGenTwoOpFn * const fns[] = {
522         NULL,
523         gen_helper_mve_vmullpth,
524         gen_helper_mve_vmullptw,
525         NULL,
526     };
527     return do_2op(s, a, fns[a->size]);
528 }
529 
530 /*
531  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
532  * of the 32-bit elements in each lane of the input vectors, where the
533  * carry-out of each add is the carry-in of the next.  The initial carry
534  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
535  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
536  * These insns are subject to beat-wise execution.  Partial execution
537  * of an I=1 (initial carry input fixed) insn which does not
538  * execute the first beat must start with the current FPSCR.NZCV
539  * value, not the fixed constant input.
540  */
541 static bool trans_VADC(DisasContext *s, arg_2op *a)
542 {
543     return do_2op(s, a, gen_helper_mve_vadc);
544 }
545 
546 static bool trans_VADCI(DisasContext *s, arg_2op *a)
547 {
548     if (mve_skip_first_beat(s)) {
549         return trans_VADC(s, a);
550     }
551     return do_2op(s, a, gen_helper_mve_vadci);
552 }
553 
554 static bool trans_VSBC(DisasContext *s, arg_2op *a)
555 {
556     return do_2op(s, a, gen_helper_mve_vsbc);
557 }
558 
559 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
560 {
561     if (mve_skip_first_beat(s)) {
562         return trans_VSBC(s, a);
563     }
564     return do_2op(s, a, gen_helper_mve_vsbci);
565 }
566 
567 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
568                           MVEGenTwoOpScalarFn fn)
569 {
570     TCGv_ptr qd, qn;
571     TCGv_i32 rm;
572 
573     if (!dc_isar_feature(aa32_mve, s) ||
574         !mve_check_qreg_bank(s, a->qd | a->qn) ||
575         !fn) {
576         return false;
577     }
578     if (a->rm == 13 || a->rm == 15) {
579         /* UNPREDICTABLE */
580         return false;
581     }
582     if (!mve_eci_check(s) || !vfp_access_check(s)) {
583         return true;
584     }
585 
586     qd = mve_qreg_ptr(a->qd);
587     qn = mve_qreg_ptr(a->qn);
588     rm = load_reg(s, a->rm);
589     fn(cpu_env, qd, qn, rm);
590     tcg_temp_free_i32(rm);
591     tcg_temp_free_ptr(qd);
592     tcg_temp_free_ptr(qn);
593     mve_update_eci(s);
594     return true;
595 }
596 
597 #define DO_2OP_SCALAR(INSN, FN) \
598     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
599     {                                                           \
600         static MVEGenTwoOpScalarFn * const fns[] = {            \
601             gen_helper_mve_##FN##b,                             \
602             gen_helper_mve_##FN##h,                             \
603             gen_helper_mve_##FN##w,                             \
604             NULL,                                               \
605         };                                                      \
606         return do_2op_scalar(s, a, fns[a->size]);               \
607     }
608 
609 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
610 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
611 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
612 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
613 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
614 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
615 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
616 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
617 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
618 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
619 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
620 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
621 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
622 DO_2OP_SCALAR(VBRSR, vbrsr)
623 DO_2OP_SCALAR(VMLA, vmla)
624 DO_2OP_SCALAR(VMLAS, vmlas)
625 
626 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
627 {
628     static MVEGenTwoOpScalarFn * const fns[] = {
629         NULL,
630         gen_helper_mve_vqdmullb_scalarh,
631         gen_helper_mve_vqdmullb_scalarw,
632         NULL,
633     };
634     if (a->qd == a->qn && a->size == MO_32) {
635         /* UNPREDICTABLE; we choose to undef */
636         return false;
637     }
638     return do_2op_scalar(s, a, fns[a->size]);
639 }
640 
641 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
642 {
643     static MVEGenTwoOpScalarFn * const fns[] = {
644         NULL,
645         gen_helper_mve_vqdmullt_scalarh,
646         gen_helper_mve_vqdmullt_scalarw,
647         NULL,
648     };
649     if (a->qd == a->qn && a->size == MO_32) {
650         /* UNPREDICTABLE; we choose to undef */
651         return false;
652     }
653     return do_2op_scalar(s, a, fns[a->size]);
654 }
655 
656 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
657                              MVEGenLongDualAccOpFn *fn)
658 {
659     TCGv_ptr qn, qm;
660     TCGv_i64 rda;
661     TCGv_i32 rdalo, rdahi;
662 
663     if (!dc_isar_feature(aa32_mve, s) ||
664         !mve_check_qreg_bank(s, a->qn | a->qm) ||
665         !fn) {
666         return false;
667     }
668     /*
669      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
670      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
671      */
672     if (a->rdahi == 13 || a->rdahi == 15) {
673         return false;
674     }
675     if (!mve_eci_check(s) || !vfp_access_check(s)) {
676         return true;
677     }
678 
679     qn = mve_qreg_ptr(a->qn);
680     qm = mve_qreg_ptr(a->qm);
681 
682     /*
683      * This insn is subject to beat-wise execution. Partial execution
684      * of an A=0 (no-accumulate) insn which does not execute the first
685      * beat must start with the current rda value, not 0.
686      */
687     if (a->a || mve_skip_first_beat(s)) {
688         rda = tcg_temp_new_i64();
689         rdalo = load_reg(s, a->rdalo);
690         rdahi = load_reg(s, a->rdahi);
691         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
692         tcg_temp_free_i32(rdalo);
693         tcg_temp_free_i32(rdahi);
694     } else {
695         rda = tcg_const_i64(0);
696     }
697 
698     fn(rda, cpu_env, qn, qm, rda);
699     tcg_temp_free_ptr(qn);
700     tcg_temp_free_ptr(qm);
701 
702     rdalo = tcg_temp_new_i32();
703     rdahi = tcg_temp_new_i32();
704     tcg_gen_extrl_i64_i32(rdalo, rda);
705     tcg_gen_extrh_i64_i32(rdahi, rda);
706     store_reg(s, a->rdalo, rdalo);
707     store_reg(s, a->rdahi, rdahi);
708     tcg_temp_free_i64(rda);
709     mve_update_eci(s);
710     return true;
711 }
712 
713 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
714 {
715     static MVEGenLongDualAccOpFn * const fns[4][2] = {
716         { NULL, NULL },
717         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
718         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
719         { NULL, NULL },
720     };
721     return do_long_dual_acc(s, a, fns[a->size][a->x]);
722 }
723 
724 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
725 {
726     static MVEGenLongDualAccOpFn * const fns[4][2] = {
727         { NULL, NULL },
728         { gen_helper_mve_vmlaldavuh, NULL },
729         { gen_helper_mve_vmlaldavuw, NULL },
730         { NULL, NULL },
731     };
732     return do_long_dual_acc(s, a, fns[a->size][a->x]);
733 }
734 
735 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
736 {
737     static MVEGenLongDualAccOpFn * const fns[4][2] = {
738         { NULL, NULL },
739         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
740         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
741         { NULL, NULL },
742     };
743     return do_long_dual_acc(s, a, fns[a->size][a->x]);
744 }
745 
746 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
747 {
748     static MVEGenLongDualAccOpFn * const fns[] = {
749         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
750     };
751     return do_long_dual_acc(s, a, fns[a->x]);
752 }
753 
754 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
755 {
756     static MVEGenLongDualAccOpFn * const fns[] = {
757         gen_helper_mve_vrmlaldavhuw, NULL,
758     };
759     return do_long_dual_acc(s, a, fns[a->x]);
760 }
761 
762 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
763 {
764     static MVEGenLongDualAccOpFn * const fns[] = {
765         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
766     };
767     return do_long_dual_acc(s, a, fns[a->x]);
768 }
769 
770 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
771 {
772     TCGv_ptr qn, qm;
773     TCGv_i32 rda;
774 
775     if (!dc_isar_feature(aa32_mve, s) ||
776         !mve_check_qreg_bank(s, a->qn) ||
777         !fn) {
778         return false;
779     }
780     if (!mve_eci_check(s) || !vfp_access_check(s)) {
781         return true;
782     }
783 
784     qn = mve_qreg_ptr(a->qn);
785     qm = mve_qreg_ptr(a->qm);
786 
787     /*
788      * This insn is subject to beat-wise execution. Partial execution
789      * of an A=0 (no-accumulate) insn which does not execute the first
790      * beat must start with the current rda value, not 0.
791      */
792     if (a->a || mve_skip_first_beat(s)) {
793         rda = load_reg(s, a->rda);
794     } else {
795         rda = tcg_const_i32(0);
796     }
797 
798     fn(rda, cpu_env, qn, qm, rda);
799     store_reg(s, a->rda, rda);
800     tcg_temp_free_ptr(qn);
801     tcg_temp_free_ptr(qm);
802 
803     mve_update_eci(s);
804     return true;
805 }
806 
807 #define DO_DUAL_ACC(INSN, FN)                                           \
808     static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
809     {                                                                   \
810         static MVEGenDualAccOpFn * const fns[4][2] = {                  \
811             { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
812             { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
813             { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
814             { NULL, NULL },                                             \
815         };                                                              \
816         return do_dual_acc(s, a, fns[a->size][a->x]);                   \
817     }
818 
819 DO_DUAL_ACC(VMLADAV_S, vmladavs)
820 DO_DUAL_ACC(VMLSDAV, vmlsdav)
821 
822 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
823 {
824     static MVEGenDualAccOpFn * const fns[4][2] = {
825         { gen_helper_mve_vmladavub, NULL },
826         { gen_helper_mve_vmladavuh, NULL },
827         { gen_helper_mve_vmladavuw, NULL },
828         { NULL, NULL },
829     };
830     return do_dual_acc(s, a, fns[a->size][a->x]);
831 }
832 
833 static void gen_vpst(DisasContext *s, uint32_t mask)
834 {
835     /*
836      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
837      * being adjacent fields in the register.
838      *
839      * Updating the masks is not predicated, but it is subject to beat-wise
840      * execution, and the mask is updated on the odd-numbered beats.
841      * So if PSR.ECI says we should skip beat 1, we mustn't update the
842      * 01 mask field.
843      */
844     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
845     switch (s->eci) {
846     case ECI_NONE:
847     case ECI_A0:
848         /* Update both 01 and 23 fields */
849         tcg_gen_deposit_i32(vpr, vpr,
850                             tcg_constant_i32(mask | (mask << 4)),
851                             R_V7M_VPR_MASK01_SHIFT,
852                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
853         break;
854     case ECI_A0A1:
855     case ECI_A0A1A2:
856     case ECI_A0A1A2B0:
857         /* Update only the 23 mask field */
858         tcg_gen_deposit_i32(vpr, vpr,
859                             tcg_constant_i32(mask),
860                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
861         break;
862     default:
863         g_assert_not_reached();
864     }
865     store_cpu_field(vpr, v7m.vpr);
866 }
867 
868 static bool trans_VPST(DisasContext *s, arg_VPST *a)
869 {
870     /* mask == 0 is a "related encoding" */
871     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
872         return false;
873     }
874     if (!mve_eci_check(s) || !vfp_access_check(s)) {
875         return true;
876     }
877     gen_vpst(s, a->mask);
878     mve_update_and_store_eci(s);
879     return true;
880 }
881 
882 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
883 {
884     /* VADDV: vector add across vector */
885     static MVEGenVADDVFn * const fns[4][2] = {
886         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
887         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
888         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
889         { NULL, NULL }
890     };
891     TCGv_ptr qm;
892     TCGv_i32 rda;
893 
894     if (!dc_isar_feature(aa32_mve, s) ||
895         a->size == 3) {
896         return false;
897     }
898     if (!mve_eci_check(s) || !vfp_access_check(s)) {
899         return true;
900     }
901 
902     /*
903      * This insn is subject to beat-wise execution. Partial execution
904      * of an A=0 (no-accumulate) insn which does not execute the first
905      * beat must start with the current value of Rda, not zero.
906      */
907     if (a->a || mve_skip_first_beat(s)) {
908         /* Accumulate input from Rda */
909         rda = load_reg(s, a->rda);
910     } else {
911         /* Accumulate starting at zero */
912         rda = tcg_const_i32(0);
913     }
914 
915     qm = mve_qreg_ptr(a->qm);
916     fns[a->size][a->u](rda, cpu_env, qm, rda);
917     store_reg(s, a->rda, rda);
918     tcg_temp_free_ptr(qm);
919 
920     mve_update_eci(s);
921     return true;
922 }
923 
924 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
925 {
926     /*
927      * Vector Add Long Across Vector: accumulate the 32-bit
928      * elements of the vector into a 64-bit result stored in
929      * a pair of general-purpose registers.
930      * No need to check Qm's bank: it is only 3 bits in decode.
931      */
932     TCGv_ptr qm;
933     TCGv_i64 rda;
934     TCGv_i32 rdalo, rdahi;
935 
936     if (!dc_isar_feature(aa32_mve, s)) {
937         return false;
938     }
939     /*
940      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
941      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
942      */
943     if (a->rdahi == 13 || a->rdahi == 15) {
944         return false;
945     }
946     if (!mve_eci_check(s) || !vfp_access_check(s)) {
947         return true;
948     }
949 
950     /*
951      * This insn is subject to beat-wise execution. Partial execution
952      * of an A=0 (no-accumulate) insn which does not execute the first
953      * beat must start with the current value of RdaHi:RdaLo, not zero.
954      */
955     if (a->a || mve_skip_first_beat(s)) {
956         /* Accumulate input from RdaHi:RdaLo */
957         rda = tcg_temp_new_i64();
958         rdalo = load_reg(s, a->rdalo);
959         rdahi = load_reg(s, a->rdahi);
960         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
961         tcg_temp_free_i32(rdalo);
962         tcg_temp_free_i32(rdahi);
963     } else {
964         /* Accumulate starting at zero */
965         rda = tcg_const_i64(0);
966     }
967 
968     qm = mve_qreg_ptr(a->qm);
969     if (a->u) {
970         gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
971     } else {
972         gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
973     }
974     tcg_temp_free_ptr(qm);
975 
976     rdalo = tcg_temp_new_i32();
977     rdahi = tcg_temp_new_i32();
978     tcg_gen_extrl_i64_i32(rdalo, rda);
979     tcg_gen_extrh_i64_i32(rdahi, rda);
980     store_reg(s, a->rdalo, rdalo);
981     store_reg(s, a->rdahi, rdahi);
982     tcg_temp_free_i64(rda);
983     mve_update_eci(s);
984     return true;
985 }
986 
987 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
988 {
989     TCGv_ptr qd;
990     uint64_t imm;
991 
992     if (!dc_isar_feature(aa32_mve, s) ||
993         !mve_check_qreg_bank(s, a->qd) ||
994         !fn) {
995         return false;
996     }
997     if (!mve_eci_check(s) || !vfp_access_check(s)) {
998         return true;
999     }
1000 
1001     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1002 
1003     qd = mve_qreg_ptr(a->qd);
1004     fn(cpu_env, qd, tcg_constant_i64(imm));
1005     tcg_temp_free_ptr(qd);
1006     mve_update_eci(s);
1007     return true;
1008 }
1009 
1010 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1011 {
1012     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1013     MVEGenOneOpImmFn *fn;
1014 
1015     if ((a->cmode & 1) && a->cmode < 12) {
1016         if (a->op) {
1017             /*
1018              * For op=1, the immediate will be inverted by asimd_imm_const(),
1019              * so the VBIC becomes a logical AND operation.
1020              */
1021             fn = gen_helper_mve_vandi;
1022         } else {
1023             fn = gen_helper_mve_vorri;
1024         }
1025     } else {
1026         /* There is one unallocated cmode/op combination in this space */
1027         if (a->cmode == 15 && a->op == 1) {
1028             return false;
1029         }
1030         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1031         fn = gen_helper_mve_vmovi;
1032     }
1033     return do_1imm(s, a, fn);
1034 }
1035 
1036 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1037                       bool negateshift)
1038 {
1039     TCGv_ptr qd, qm;
1040     int shift = a->shift;
1041 
1042     if (!dc_isar_feature(aa32_mve, s) ||
1043         !mve_check_qreg_bank(s, a->qd | a->qm) ||
1044         !fn) {
1045         return false;
1046     }
1047     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1048         return true;
1049     }
1050 
1051     /*
1052      * When we handle a right shift insn using a left-shift helper
1053      * which permits a negative shift count to indicate a right-shift,
1054      * we must negate the shift count.
1055      */
1056     if (negateshift) {
1057         shift = -shift;
1058     }
1059 
1060     qd = mve_qreg_ptr(a->qd);
1061     qm = mve_qreg_ptr(a->qm);
1062     fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1063     tcg_temp_free_ptr(qd);
1064     tcg_temp_free_ptr(qm);
1065     mve_update_eci(s);
1066     return true;
1067 }
1068 
1069 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)                         \
1070     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1071     {                                                           \
1072         static MVEGenTwoOpShiftFn * const fns[] = {             \
1073             gen_helper_mve_##FN##b,                             \
1074             gen_helper_mve_##FN##h,                             \
1075             gen_helper_mve_##FN##w,                             \
1076             NULL,                                               \
1077         };                                                      \
1078         return do_2shift(s, a, fns[a->size], NEGATESHIFT);      \
1079     }
1080 
1081 DO_2SHIFT(VSHLI, vshli_u, false)
1082 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1083 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1084 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1085 /* These right shifts use a left-shift helper with negated shift count */
1086 DO_2SHIFT(VSHRI_S, vshli_s, true)
1087 DO_2SHIFT(VSHRI_U, vshli_u, true)
1088 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1089 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1090 
1091 DO_2SHIFT(VSRI, vsri, false)
1092 DO_2SHIFT(VSLI, vsli, false)
1093 
1094 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1095                              MVEGenTwoOpShiftFn *fn)
1096 {
1097     TCGv_ptr qda;
1098     TCGv_i32 rm;
1099 
1100     if (!dc_isar_feature(aa32_mve, s) ||
1101         !mve_check_qreg_bank(s, a->qda) ||
1102         a->rm == 13 || a->rm == 15 || !fn) {
1103         /* Rm cases are UNPREDICTABLE */
1104         return false;
1105     }
1106     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1107         return true;
1108     }
1109 
1110     qda = mve_qreg_ptr(a->qda);
1111     rm = load_reg(s, a->rm);
1112     fn(cpu_env, qda, qda, rm);
1113     tcg_temp_free_ptr(qda);
1114     tcg_temp_free_i32(rm);
1115     mve_update_eci(s);
1116     return true;
1117 }
1118 
1119 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
1120     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
1121     {                                                                   \
1122         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1123             gen_helper_mve_##FN##b,                                     \
1124             gen_helper_mve_##FN##h,                                     \
1125             gen_helper_mve_##FN##w,                                     \
1126             NULL,                                                       \
1127         };                                                              \
1128         return do_2shift_scalar(s, a, fns[a->size]);                    \
1129     }
1130 
1131 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1132 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1133 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1134 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1135 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1136 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1137 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1138 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1139 
1140 #define DO_VSHLL(INSN, FN)                                      \
1141     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1142     {                                                           \
1143         static MVEGenTwoOpShiftFn * const fns[] = {             \
1144             gen_helper_mve_##FN##b,                             \
1145             gen_helper_mve_##FN##h,                             \
1146         };                                                      \
1147         return do_2shift(s, a, fns[a->size], false);            \
1148     }
1149 
1150 DO_VSHLL(VSHLL_BS, vshllbs)
1151 DO_VSHLL(VSHLL_BU, vshllbu)
1152 DO_VSHLL(VSHLL_TS, vshllts)
1153 DO_VSHLL(VSHLL_TU, vshlltu)
1154 
1155 #define DO_2SHIFT_N(INSN, FN)                                   \
1156     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1157     {                                                           \
1158         static MVEGenTwoOpShiftFn * const fns[] = {             \
1159             gen_helper_mve_##FN##b,                             \
1160             gen_helper_mve_##FN##h,                             \
1161         };                                                      \
1162         return do_2shift(s, a, fns[a->size], false);            \
1163     }
1164 
1165 DO_2SHIFT_N(VSHRNB, vshrnb)
1166 DO_2SHIFT_N(VSHRNT, vshrnt)
1167 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1168 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1169 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1170 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1171 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1172 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1173 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1174 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1175 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1176 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1177 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1178 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1179 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1180 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1181 
1182 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1183 {
1184     /*
1185      * Whole Vector Left Shift with Carry. The carry is taken
1186      * from a general purpose register and written back there.
1187      * An imm of 0 means "shift by 32".
1188      */
1189     TCGv_ptr qd;
1190     TCGv_i32 rdm;
1191 
1192     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1193         return false;
1194     }
1195     if (a->rdm == 13 || a->rdm == 15) {
1196         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1197         return false;
1198     }
1199     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1200         return true;
1201     }
1202 
1203     qd = mve_qreg_ptr(a->qd);
1204     rdm = load_reg(s, a->rdm);
1205     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1206     store_reg(s, a->rdm, rdm);
1207     tcg_temp_free_ptr(qd);
1208     mve_update_eci(s);
1209     return true;
1210 }
1211 
1212 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1213 {
1214     TCGv_ptr qd;
1215     TCGv_i32 rn;
1216 
1217     /*
1218      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1219      * This fills the vector with elements of successively increasing
1220      * or decreasing values, starting from Rn.
1221      */
1222     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1223         return false;
1224     }
1225     if (a->size == MO_64) {
1226         /* size 0b11 is another encoding */
1227         return false;
1228     }
1229     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1230         return true;
1231     }
1232 
1233     qd = mve_qreg_ptr(a->qd);
1234     rn = load_reg(s, a->rn);
1235     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1236     store_reg(s, a->rn, rn);
1237     tcg_temp_free_ptr(qd);
1238     mve_update_eci(s);
1239     return true;
1240 }
1241 
1242 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1243 {
1244     TCGv_ptr qd;
1245     TCGv_i32 rn, rm;
1246 
1247     /*
1248      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1249      * This fills the vector with elements of successively increasing
1250      * or decreasing values, starting from Rn. Rm specifies a point where
1251      * the count wraps back around to 0. The updated offset is written back
1252      * to Rn.
1253      */
1254     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1255         return false;
1256     }
1257     if (!fn || a->rm == 13 || a->rm == 15) {
1258         /*
1259          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1260          * Rm == 13 is VIWDUP, VDWDUP.
1261          */
1262         return false;
1263     }
1264     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1265         return true;
1266     }
1267 
1268     qd = mve_qreg_ptr(a->qd);
1269     rn = load_reg(s, a->rn);
1270     rm = load_reg(s, a->rm);
1271     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1272     store_reg(s, a->rn, rn);
1273     tcg_temp_free_ptr(qd);
1274     tcg_temp_free_i32(rm);
1275     mve_update_eci(s);
1276     return true;
1277 }
1278 
1279 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1280 {
1281     static MVEGenVIDUPFn * const fns[] = {
1282         gen_helper_mve_vidupb,
1283         gen_helper_mve_viduph,
1284         gen_helper_mve_vidupw,
1285         NULL,
1286     };
1287     return do_vidup(s, a, fns[a->size]);
1288 }
1289 
1290 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1291 {
1292     static MVEGenVIDUPFn * const fns[] = {
1293         gen_helper_mve_vidupb,
1294         gen_helper_mve_viduph,
1295         gen_helper_mve_vidupw,
1296         NULL,
1297     };
1298     /* VDDUP is just like VIDUP but with a negative immediate */
1299     a->imm = -a->imm;
1300     return do_vidup(s, a, fns[a->size]);
1301 }
1302 
1303 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1304 {
1305     static MVEGenVIWDUPFn * const fns[] = {
1306         gen_helper_mve_viwdupb,
1307         gen_helper_mve_viwduph,
1308         gen_helper_mve_viwdupw,
1309         NULL,
1310     };
1311     return do_viwdup(s, a, fns[a->size]);
1312 }
1313 
1314 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1315 {
1316     static MVEGenVIWDUPFn * const fns[] = {
1317         gen_helper_mve_vdwdupb,
1318         gen_helper_mve_vdwduph,
1319         gen_helper_mve_vdwdupw,
1320         NULL,
1321     };
1322     return do_viwdup(s, a, fns[a->size]);
1323 }
1324 
1325 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1326 {
1327     TCGv_ptr qn, qm;
1328 
1329     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1330         !fn) {
1331         return false;
1332     }
1333     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1334         return true;
1335     }
1336 
1337     qn = mve_qreg_ptr(a->qn);
1338     qm = mve_qreg_ptr(a->qm);
1339     fn(cpu_env, qn, qm);
1340     tcg_temp_free_ptr(qn);
1341     tcg_temp_free_ptr(qm);
1342     if (a->mask) {
1343         /* VPT */
1344         gen_vpst(s, a->mask);
1345     }
1346     mve_update_eci(s);
1347     return true;
1348 }
1349 
1350 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1351                            MVEGenScalarCmpFn *fn)
1352 {
1353     TCGv_ptr qn;
1354     TCGv_i32 rm;
1355 
1356     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1357         return false;
1358     }
1359     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1360         return true;
1361     }
1362 
1363     qn = mve_qreg_ptr(a->qn);
1364     if (a->rm == 15) {
1365         /* Encoding Rm=0b1111 means "constant zero" */
1366         rm = tcg_constant_i32(0);
1367     } else {
1368         rm = load_reg(s, a->rm);
1369     }
1370     fn(cpu_env, qn, rm);
1371     tcg_temp_free_ptr(qn);
1372     tcg_temp_free_i32(rm);
1373     if (a->mask) {
1374         /* VPT */
1375         gen_vpst(s, a->mask);
1376     }
1377     mve_update_eci(s);
1378     return true;
1379 }
1380 
1381 #define DO_VCMP(INSN, FN)                                       \
1382     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
1383     {                                                           \
1384         static MVEGenCmpFn * const fns[] = {                    \
1385             gen_helper_mve_##FN##b,                             \
1386             gen_helper_mve_##FN##h,                             \
1387             gen_helper_mve_##FN##w,                             \
1388             NULL,                                               \
1389         };                                                      \
1390         return do_vcmp(s, a, fns[a->size]);                     \
1391     }                                                           \
1392     static bool trans_##INSN##_scalar(DisasContext *s,          \
1393                                       arg_vcmp_scalar *a)       \
1394     {                                                           \
1395         static MVEGenScalarCmpFn * const fns[] = {              \
1396             gen_helper_mve_##FN##_scalarb,                      \
1397             gen_helper_mve_##FN##_scalarh,                      \
1398             gen_helper_mve_##FN##_scalarw,                      \
1399             NULL,                                               \
1400         };                                                      \
1401         return do_vcmp_scalar(s, a, fns[a->size]);              \
1402     }
1403 
1404 DO_VCMP(VCMPEQ, vcmpeq)
1405 DO_VCMP(VCMPNE, vcmpne)
1406 DO_VCMP(VCMPCS, vcmpcs)
1407 DO_VCMP(VCMPHI, vcmphi)
1408 DO_VCMP(VCMPGE, vcmpge)
1409 DO_VCMP(VCMPLT, vcmplt)
1410 DO_VCMP(VCMPGT, vcmpgt)
1411 DO_VCMP(VCMPLE, vcmple)
1412 
1413 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
1414 {
1415     /*
1416      * MIN/MAX operations across a vector: compute the min or
1417      * max of the initial value in a general purpose register
1418      * and all the elements in the vector, and store it back
1419      * into the general purpose register.
1420      */
1421     TCGv_ptr qm;
1422     TCGv_i32 rda;
1423 
1424     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1425         !fn || a->rda == 13 || a->rda == 15) {
1426         /* Rda cases are UNPREDICTABLE */
1427         return false;
1428     }
1429     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1430         return true;
1431     }
1432 
1433     qm = mve_qreg_ptr(a->qm);
1434     rda = load_reg(s, a->rda);
1435     fn(rda, cpu_env, qm, rda);
1436     store_reg(s, a->rda, rda);
1437     tcg_temp_free_ptr(qm);
1438     mve_update_eci(s);
1439     return true;
1440 }
1441 
1442 #define DO_VMAXV(INSN, FN)                                      \
1443     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
1444     {                                                           \
1445         static MVEGenVADDVFn * const fns[] = {                  \
1446             gen_helper_mve_##FN##b,                             \
1447             gen_helper_mve_##FN##h,                             \
1448             gen_helper_mve_##FN##w,                             \
1449             NULL,                                               \
1450         };                                                      \
1451         return do_vmaxv(s, a, fns[a->size]);                    \
1452     }
1453 
1454 DO_VMAXV(VMAXV_S, vmaxvs)
1455 DO_VMAXV(VMAXV_U, vmaxvu)
1456 DO_VMAXV(VMAXAV, vmaxav)
1457 DO_VMAXV(VMINV_S, vminvs)
1458 DO_VMAXV(VMINV_U, vminvu)
1459 DO_VMAXV(VMINAV, vminav)
1460 
1461 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
1462 {
1463     /* Absolute difference accumulated across vector */
1464     TCGv_ptr qn, qm;
1465     TCGv_i32 rda;
1466 
1467     if (!dc_isar_feature(aa32_mve, s) ||
1468         !mve_check_qreg_bank(s, a->qm | a->qn) ||
1469         !fn || a->rda == 13 || a->rda == 15) {
1470         /* Rda cases are UNPREDICTABLE */
1471         return false;
1472     }
1473     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1474         return true;
1475     }
1476 
1477     qm = mve_qreg_ptr(a->qm);
1478     qn = mve_qreg_ptr(a->qn);
1479     rda = load_reg(s, a->rda);
1480     fn(rda, cpu_env, qn, qm, rda);
1481     store_reg(s, a->rda, rda);
1482     tcg_temp_free_ptr(qm);
1483     tcg_temp_free_ptr(qn);
1484     mve_update_eci(s);
1485     return true;
1486 }
1487 
1488 #define DO_VABAV(INSN, FN)                                      \
1489     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
1490     {                                                           \
1491         static MVEGenVABAVFn * const fns[] = {                  \
1492             gen_helper_mve_##FN##b,                             \
1493             gen_helper_mve_##FN##h,                             \
1494             gen_helper_mve_##FN##w,                             \
1495             NULL,                                               \
1496         };                                                      \
1497         return do_vabav(s, a, fns[a->size]);                    \
1498     }
1499 
1500 DO_VABAV(VABAV_S, vabavs)
1501 DO_VABAV(VABAV_U, vabavu)
1502