xref: /qemu/target/arm/tcg/translate-mve.c (revision 8be9a25058f9a5505d6864f06de86ee01d42fc59)
1 /*
2  *  ARM translation: M-profile MVE instructions
3  *
4  *  Copyright (c) 2021 Linaro, Ltd.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
27 
28 static inline int vidup_imm(DisasContext *s, int x)
29 {
30     return 1 << x;
31 }
32 
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
35 
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
49 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
50 
51 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
52 static inline long mve_qreg_offset(unsigned reg)
53 {
54     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
55 }
56 
57 static TCGv_ptr mve_qreg_ptr(unsigned reg)
58 {
59     TCGv_ptr ret = tcg_temp_new_ptr();
60     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
61     return ret;
62 }
63 
64 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
65 {
66     /*
67      * Check whether Qregs are in range. For v8.1M only Q0..Q7
68      * are supported, see VFPSmallRegisterBank().
69      */
70     return qmask < 8;
71 }
72 
73 bool mve_eci_check(DisasContext *s)
74 {
75     /*
76      * This is a beatwise insn: check that ECI is valid (not a
77      * reserved value) and note that we are handling it.
78      * Return true if OK, false if we generated an exception.
79      */
80     s->eci_handled = true;
81     switch (s->eci) {
82     case ECI_NONE:
83     case ECI_A0:
84     case ECI_A0A1:
85     case ECI_A0A1A2:
86     case ECI_A0A1A2B0:
87         return true;
88     default:
89         /* Reserved value: INVSTATE UsageFault */
90         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
91                            default_exception_el(s));
92         return false;
93     }
94 }
95 
96 static void mve_update_eci(DisasContext *s)
97 {
98     /*
99      * The helper function will always update the CPUState field,
100      * so we only need to update the DisasContext field.
101      */
102     if (s->eci) {
103         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
104     }
105 }
106 
107 void mve_update_and_store_eci(DisasContext *s)
108 {
109     /*
110      * For insns which don't call a helper function that will call
111      * mve_advance_vpt(), this version updates s->eci and also stores
112      * it out to the CPUState field.
113      */
114     if (s->eci) {
115         mve_update_eci(s);
116         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
117     }
118 }
119 
120 static bool mve_skip_first_beat(DisasContext *s)
121 {
122     /* Return true if PSR.ECI says we must skip the first beat of this insn */
123     switch (s->eci) {
124     case ECI_NONE:
125         return false;
126     case ECI_A0:
127     case ECI_A0A1:
128     case ECI_A0A1A2:
129     case ECI_A0A1A2B0:
130         return true;
131     default:
132         g_assert_not_reached();
133     }
134 }
135 
136 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
137                     unsigned msize)
138 {
139     TCGv_i32 addr;
140     uint32_t offset;
141     TCGv_ptr qreg;
142 
143     if (!dc_isar_feature(aa32_mve, s) ||
144         !mve_check_qreg_bank(s, a->qd) ||
145         !fn) {
146         return false;
147     }
148 
149     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
150     if (a->rn == 15 || (a->rn == 13 && a->w)) {
151         return false;
152     }
153 
154     if (!mve_eci_check(s) || !vfp_access_check(s)) {
155         return true;
156     }
157 
158     offset = a->imm << msize;
159     if (!a->a) {
160         offset = -offset;
161     }
162     addr = load_reg(s, a->rn);
163     if (a->p) {
164         tcg_gen_addi_i32(addr, addr, offset);
165     }
166 
167     qreg = mve_qreg_ptr(a->qd);
168     fn(cpu_env, qreg, addr);
169     tcg_temp_free_ptr(qreg);
170 
171     /*
172      * Writeback always happens after the last beat of the insn,
173      * regardless of predication
174      */
175     if (a->w) {
176         if (!a->p) {
177             tcg_gen_addi_i32(addr, addr, offset);
178         }
179         store_reg(s, a->rn, addr);
180     } else {
181         tcg_temp_free_i32(addr);
182     }
183     mve_update_eci(s);
184     return true;
185 }
186 
187 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
188 {
189     static MVEGenLdStFn * const ldstfns[4][2] = {
190         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
191         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
192         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
193         { NULL, NULL }
194     };
195     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
196 }
197 
198 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
199     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
200     {                                                           \
201         static MVEGenLdStFn * const ldstfns[2][2] = {           \
202             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
203             { NULL, gen_helper_mve_##ULD },                     \
204         };                                                      \
205         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
206     }
207 
208 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
209 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
210 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
211 
212 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
213 {
214     TCGv_ptr qd;
215     TCGv_i32 rt;
216 
217     if (!dc_isar_feature(aa32_mve, s) ||
218         !mve_check_qreg_bank(s, a->qd)) {
219         return false;
220     }
221     if (a->rt == 13 || a->rt == 15) {
222         /* UNPREDICTABLE; we choose to UNDEF */
223         return false;
224     }
225     if (!mve_eci_check(s) || !vfp_access_check(s)) {
226         return true;
227     }
228 
229     qd = mve_qreg_ptr(a->qd);
230     rt = load_reg(s, a->rt);
231     tcg_gen_dup_i32(a->size, rt, rt);
232     gen_helper_mve_vdup(cpu_env, qd, rt);
233     tcg_temp_free_ptr(qd);
234     tcg_temp_free_i32(rt);
235     mve_update_eci(s);
236     return true;
237 }
238 
239 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
240 {
241     TCGv_ptr qd, qm;
242 
243     if (!dc_isar_feature(aa32_mve, s) ||
244         !mve_check_qreg_bank(s, a->qd | a->qm) ||
245         !fn) {
246         return false;
247     }
248 
249     if (!mve_eci_check(s) || !vfp_access_check(s)) {
250         return true;
251     }
252 
253     qd = mve_qreg_ptr(a->qd);
254     qm = mve_qreg_ptr(a->qm);
255     fn(cpu_env, qd, qm);
256     tcg_temp_free_ptr(qd);
257     tcg_temp_free_ptr(qm);
258     mve_update_eci(s);
259     return true;
260 }
261 
262 #define DO_1OP(INSN, FN)                                        \
263     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
264     {                                                           \
265         static MVEGenOneOpFn * const fns[] = {                  \
266             gen_helper_mve_##FN##b,                             \
267             gen_helper_mve_##FN##h,                             \
268             gen_helper_mve_##FN##w,                             \
269             NULL,                                               \
270         };                                                      \
271         return do_1op(s, a, fns[a->size]);                      \
272     }
273 
274 DO_1OP(VCLZ, vclz)
275 DO_1OP(VCLS, vcls)
276 DO_1OP(VABS, vabs)
277 DO_1OP(VNEG, vneg)
278 
279 /* Narrowing moves: only size 0 and 1 are valid */
280 #define DO_VMOVN(INSN, FN) \
281     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
282     {                                                           \
283         static MVEGenOneOpFn * const fns[] = {                  \
284             gen_helper_mve_##FN##b,                             \
285             gen_helper_mve_##FN##h,                             \
286             NULL,                                               \
287             NULL,                                               \
288         };                                                      \
289         return do_1op(s, a, fns[a->size]);                      \
290     }
291 
292 DO_VMOVN(VMOVNB, vmovnb)
293 DO_VMOVN(VMOVNT, vmovnt)
294 DO_VMOVN(VQMOVUNB, vqmovunb)
295 DO_VMOVN(VQMOVUNT, vqmovunt)
296 DO_VMOVN(VQMOVN_BS, vqmovnbs)
297 DO_VMOVN(VQMOVN_TS, vqmovnts)
298 DO_VMOVN(VQMOVN_BU, vqmovnbu)
299 DO_VMOVN(VQMOVN_TU, vqmovntu)
300 
301 static bool trans_VREV16(DisasContext *s, arg_1op *a)
302 {
303     static MVEGenOneOpFn * const fns[] = {
304         gen_helper_mve_vrev16b,
305         NULL,
306         NULL,
307         NULL,
308     };
309     return do_1op(s, a, fns[a->size]);
310 }
311 
312 static bool trans_VREV32(DisasContext *s, arg_1op *a)
313 {
314     static MVEGenOneOpFn * const fns[] = {
315         gen_helper_mve_vrev32b,
316         gen_helper_mve_vrev32h,
317         NULL,
318         NULL,
319     };
320     return do_1op(s, a, fns[a->size]);
321 }
322 
323 static bool trans_VREV64(DisasContext *s, arg_1op *a)
324 {
325     static MVEGenOneOpFn * const fns[] = {
326         gen_helper_mve_vrev64b,
327         gen_helper_mve_vrev64h,
328         gen_helper_mve_vrev64w,
329         NULL,
330     };
331     return do_1op(s, a, fns[a->size]);
332 }
333 
334 static bool trans_VMVN(DisasContext *s, arg_1op *a)
335 {
336     return do_1op(s, a, gen_helper_mve_vmvn);
337 }
338 
339 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
340 {
341     static MVEGenOneOpFn * const fns[] = {
342         NULL,
343         gen_helper_mve_vfabsh,
344         gen_helper_mve_vfabss,
345         NULL,
346     };
347     if (!dc_isar_feature(aa32_mve_fp, s)) {
348         return false;
349     }
350     return do_1op(s, a, fns[a->size]);
351 }
352 
353 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
354 {
355     static MVEGenOneOpFn * const fns[] = {
356         NULL,
357         gen_helper_mve_vfnegh,
358         gen_helper_mve_vfnegs,
359         NULL,
360     };
361     if (!dc_isar_feature(aa32_mve_fp, s)) {
362         return false;
363     }
364     return do_1op(s, a, fns[a->size]);
365 }
366 
367 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
368 {
369     TCGv_ptr qd, qn, qm;
370 
371     if (!dc_isar_feature(aa32_mve, s) ||
372         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
373         !fn) {
374         return false;
375     }
376     if (!mve_eci_check(s) || !vfp_access_check(s)) {
377         return true;
378     }
379 
380     qd = mve_qreg_ptr(a->qd);
381     qn = mve_qreg_ptr(a->qn);
382     qm = mve_qreg_ptr(a->qm);
383     fn(cpu_env, qd, qn, qm);
384     tcg_temp_free_ptr(qd);
385     tcg_temp_free_ptr(qn);
386     tcg_temp_free_ptr(qm);
387     mve_update_eci(s);
388     return true;
389 }
390 
391 #define DO_LOGIC(INSN, HELPER)                                  \
392     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
393     {                                                           \
394         return do_2op(s, a, HELPER);                            \
395     }
396 
397 DO_LOGIC(VAND, gen_helper_mve_vand)
398 DO_LOGIC(VBIC, gen_helper_mve_vbic)
399 DO_LOGIC(VORR, gen_helper_mve_vorr)
400 DO_LOGIC(VORN, gen_helper_mve_vorn)
401 DO_LOGIC(VEOR, gen_helper_mve_veor)
402 
403 DO_LOGIC(VPSEL, gen_helper_mve_vpsel)
404 
405 #define DO_2OP(INSN, FN) \
406     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
407     {                                                           \
408         static MVEGenTwoOpFn * const fns[] = {                  \
409             gen_helper_mve_##FN##b,                             \
410             gen_helper_mve_##FN##h,                             \
411             gen_helper_mve_##FN##w,                             \
412             NULL,                                               \
413         };                                                      \
414         return do_2op(s, a, fns[a->size]);                      \
415     }
416 
417 DO_2OP(VADD, vadd)
418 DO_2OP(VSUB, vsub)
419 DO_2OP(VMUL, vmul)
420 DO_2OP(VMULH_S, vmulhs)
421 DO_2OP(VMULH_U, vmulhu)
422 DO_2OP(VRMULH_S, vrmulhs)
423 DO_2OP(VRMULH_U, vrmulhu)
424 DO_2OP(VMAX_S, vmaxs)
425 DO_2OP(VMAX_U, vmaxu)
426 DO_2OP(VMIN_S, vmins)
427 DO_2OP(VMIN_U, vminu)
428 DO_2OP(VABD_S, vabds)
429 DO_2OP(VABD_U, vabdu)
430 DO_2OP(VHADD_S, vhadds)
431 DO_2OP(VHADD_U, vhaddu)
432 DO_2OP(VHSUB_S, vhsubs)
433 DO_2OP(VHSUB_U, vhsubu)
434 DO_2OP(VMULL_BS, vmullbs)
435 DO_2OP(VMULL_BU, vmullbu)
436 DO_2OP(VMULL_TS, vmullts)
437 DO_2OP(VMULL_TU, vmulltu)
438 DO_2OP(VQDMULH, vqdmulh)
439 DO_2OP(VQRDMULH, vqrdmulh)
440 DO_2OP(VQADD_S, vqadds)
441 DO_2OP(VQADD_U, vqaddu)
442 DO_2OP(VQSUB_S, vqsubs)
443 DO_2OP(VQSUB_U, vqsubu)
444 DO_2OP(VSHL_S, vshls)
445 DO_2OP(VSHL_U, vshlu)
446 DO_2OP(VRSHL_S, vrshls)
447 DO_2OP(VRSHL_U, vrshlu)
448 DO_2OP(VQSHL_S, vqshls)
449 DO_2OP(VQSHL_U, vqshlu)
450 DO_2OP(VQRSHL_S, vqrshls)
451 DO_2OP(VQRSHL_U, vqrshlu)
452 DO_2OP(VQDMLADH, vqdmladh)
453 DO_2OP(VQDMLADHX, vqdmladhx)
454 DO_2OP(VQRDMLADH, vqrdmladh)
455 DO_2OP(VQRDMLADHX, vqrdmladhx)
456 DO_2OP(VQDMLSDH, vqdmlsdh)
457 DO_2OP(VQDMLSDHX, vqdmlsdhx)
458 DO_2OP(VQRDMLSDH, vqrdmlsdh)
459 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
460 DO_2OP(VRHADD_S, vrhadds)
461 DO_2OP(VRHADD_U, vrhaddu)
462 /*
463  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
464  * so we can reuse the DO_2OP macro. (Our implementation calculates the
465  * "expected" results in this case.) Similarly for VHCADD.
466  */
467 DO_2OP(VCADD90, vcadd90)
468 DO_2OP(VCADD270, vcadd270)
469 DO_2OP(VHCADD90, vhcadd90)
470 DO_2OP(VHCADD270, vhcadd270)
471 
472 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
473 {
474     static MVEGenTwoOpFn * const fns[] = {
475         NULL,
476         gen_helper_mve_vqdmullbh,
477         gen_helper_mve_vqdmullbw,
478         NULL,
479     };
480     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
481         /* UNPREDICTABLE; we choose to undef */
482         return false;
483     }
484     return do_2op(s, a, fns[a->size]);
485 }
486 
487 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
488 {
489     static MVEGenTwoOpFn * const fns[] = {
490         NULL,
491         gen_helper_mve_vqdmullth,
492         gen_helper_mve_vqdmulltw,
493         NULL,
494     };
495     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
496         /* UNPREDICTABLE; we choose to undef */
497         return false;
498     }
499     return do_2op(s, a, fns[a->size]);
500 }
501 
502 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
503 {
504     /*
505      * Note that a->size indicates the output size, ie VMULL.P8
506      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
507      * is the 16x16->32 operation and a->size is MO_32.
508      */
509     static MVEGenTwoOpFn * const fns[] = {
510         NULL,
511         gen_helper_mve_vmullpbh,
512         gen_helper_mve_vmullpbw,
513         NULL,
514     };
515     return do_2op(s, a, fns[a->size]);
516 }
517 
518 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
519 {
520     /* a->size is as for trans_VMULLP_B */
521     static MVEGenTwoOpFn * const fns[] = {
522         NULL,
523         gen_helper_mve_vmullpth,
524         gen_helper_mve_vmullptw,
525         NULL,
526     };
527     return do_2op(s, a, fns[a->size]);
528 }
529 
530 /*
531  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
532  * of the 32-bit elements in each lane of the input vectors, where the
533  * carry-out of each add is the carry-in of the next.  The initial carry
534  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
535  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
536  * These insns are subject to beat-wise execution.  Partial execution
537  * of an I=1 (initial carry input fixed) insn which does not
538  * execute the first beat must start with the current FPSCR.NZCV
539  * value, not the fixed constant input.
540  */
541 static bool trans_VADC(DisasContext *s, arg_2op *a)
542 {
543     return do_2op(s, a, gen_helper_mve_vadc);
544 }
545 
546 static bool trans_VADCI(DisasContext *s, arg_2op *a)
547 {
548     if (mve_skip_first_beat(s)) {
549         return trans_VADC(s, a);
550     }
551     return do_2op(s, a, gen_helper_mve_vadci);
552 }
553 
554 static bool trans_VSBC(DisasContext *s, arg_2op *a)
555 {
556     return do_2op(s, a, gen_helper_mve_vsbc);
557 }
558 
559 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
560 {
561     if (mve_skip_first_beat(s)) {
562         return trans_VSBC(s, a);
563     }
564     return do_2op(s, a, gen_helper_mve_vsbci);
565 }
566 
567 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
568                           MVEGenTwoOpScalarFn fn)
569 {
570     TCGv_ptr qd, qn;
571     TCGv_i32 rm;
572 
573     if (!dc_isar_feature(aa32_mve, s) ||
574         !mve_check_qreg_bank(s, a->qd | a->qn) ||
575         !fn) {
576         return false;
577     }
578     if (a->rm == 13 || a->rm == 15) {
579         /* UNPREDICTABLE */
580         return false;
581     }
582     if (!mve_eci_check(s) || !vfp_access_check(s)) {
583         return true;
584     }
585 
586     qd = mve_qreg_ptr(a->qd);
587     qn = mve_qreg_ptr(a->qn);
588     rm = load_reg(s, a->rm);
589     fn(cpu_env, qd, qn, rm);
590     tcg_temp_free_i32(rm);
591     tcg_temp_free_ptr(qd);
592     tcg_temp_free_ptr(qn);
593     mve_update_eci(s);
594     return true;
595 }
596 
597 #define DO_2OP_SCALAR(INSN, FN) \
598     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
599     {                                                           \
600         static MVEGenTwoOpScalarFn * const fns[] = {            \
601             gen_helper_mve_##FN##b,                             \
602             gen_helper_mve_##FN##h,                             \
603             gen_helper_mve_##FN##w,                             \
604             NULL,                                               \
605         };                                                      \
606         return do_2op_scalar(s, a, fns[a->size]);               \
607     }
608 
609 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
610 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
611 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
612 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
613 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
614 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
615 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
616 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
617 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
618 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
619 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
620 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
621 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
622 DO_2OP_SCALAR(VBRSR, vbrsr)
623 DO_2OP_SCALAR(VMLA, vmla)
624 DO_2OP_SCALAR(VMLAS, vmlas)
625 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
626 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
627 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
628 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
629 
630 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
631 {
632     static MVEGenTwoOpScalarFn * const fns[] = {
633         NULL,
634         gen_helper_mve_vqdmullb_scalarh,
635         gen_helper_mve_vqdmullb_scalarw,
636         NULL,
637     };
638     if (a->qd == a->qn && a->size == MO_32) {
639         /* UNPREDICTABLE; we choose to undef */
640         return false;
641     }
642     return do_2op_scalar(s, a, fns[a->size]);
643 }
644 
645 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
646 {
647     static MVEGenTwoOpScalarFn * const fns[] = {
648         NULL,
649         gen_helper_mve_vqdmullt_scalarh,
650         gen_helper_mve_vqdmullt_scalarw,
651         NULL,
652     };
653     if (a->qd == a->qn && a->size == MO_32) {
654         /* UNPREDICTABLE; we choose to undef */
655         return false;
656     }
657     return do_2op_scalar(s, a, fns[a->size]);
658 }
659 
660 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
661                              MVEGenLongDualAccOpFn *fn)
662 {
663     TCGv_ptr qn, qm;
664     TCGv_i64 rda;
665     TCGv_i32 rdalo, rdahi;
666 
667     if (!dc_isar_feature(aa32_mve, s) ||
668         !mve_check_qreg_bank(s, a->qn | a->qm) ||
669         !fn) {
670         return false;
671     }
672     /*
673      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
674      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
675      */
676     if (a->rdahi == 13 || a->rdahi == 15) {
677         return false;
678     }
679     if (!mve_eci_check(s) || !vfp_access_check(s)) {
680         return true;
681     }
682 
683     qn = mve_qreg_ptr(a->qn);
684     qm = mve_qreg_ptr(a->qm);
685 
686     /*
687      * This insn is subject to beat-wise execution. Partial execution
688      * of an A=0 (no-accumulate) insn which does not execute the first
689      * beat must start with the current rda value, not 0.
690      */
691     if (a->a || mve_skip_first_beat(s)) {
692         rda = tcg_temp_new_i64();
693         rdalo = load_reg(s, a->rdalo);
694         rdahi = load_reg(s, a->rdahi);
695         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
696         tcg_temp_free_i32(rdalo);
697         tcg_temp_free_i32(rdahi);
698     } else {
699         rda = tcg_const_i64(0);
700     }
701 
702     fn(rda, cpu_env, qn, qm, rda);
703     tcg_temp_free_ptr(qn);
704     tcg_temp_free_ptr(qm);
705 
706     rdalo = tcg_temp_new_i32();
707     rdahi = tcg_temp_new_i32();
708     tcg_gen_extrl_i64_i32(rdalo, rda);
709     tcg_gen_extrh_i64_i32(rdahi, rda);
710     store_reg(s, a->rdalo, rdalo);
711     store_reg(s, a->rdahi, rdahi);
712     tcg_temp_free_i64(rda);
713     mve_update_eci(s);
714     return true;
715 }
716 
717 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
718 {
719     static MVEGenLongDualAccOpFn * const fns[4][2] = {
720         { NULL, NULL },
721         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
722         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
723         { NULL, NULL },
724     };
725     return do_long_dual_acc(s, a, fns[a->size][a->x]);
726 }
727 
728 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
729 {
730     static MVEGenLongDualAccOpFn * const fns[4][2] = {
731         { NULL, NULL },
732         { gen_helper_mve_vmlaldavuh, NULL },
733         { gen_helper_mve_vmlaldavuw, NULL },
734         { NULL, NULL },
735     };
736     return do_long_dual_acc(s, a, fns[a->size][a->x]);
737 }
738 
739 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
740 {
741     static MVEGenLongDualAccOpFn * const fns[4][2] = {
742         { NULL, NULL },
743         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
744         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
745         { NULL, NULL },
746     };
747     return do_long_dual_acc(s, a, fns[a->size][a->x]);
748 }
749 
750 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
751 {
752     static MVEGenLongDualAccOpFn * const fns[] = {
753         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
754     };
755     return do_long_dual_acc(s, a, fns[a->x]);
756 }
757 
758 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
759 {
760     static MVEGenLongDualAccOpFn * const fns[] = {
761         gen_helper_mve_vrmlaldavhuw, NULL,
762     };
763     return do_long_dual_acc(s, a, fns[a->x]);
764 }
765 
766 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
767 {
768     static MVEGenLongDualAccOpFn * const fns[] = {
769         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
770     };
771     return do_long_dual_acc(s, a, fns[a->x]);
772 }
773 
774 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
775 {
776     TCGv_ptr qn, qm;
777     TCGv_i32 rda;
778 
779     if (!dc_isar_feature(aa32_mve, s) ||
780         !mve_check_qreg_bank(s, a->qn) ||
781         !fn) {
782         return false;
783     }
784     if (!mve_eci_check(s) || !vfp_access_check(s)) {
785         return true;
786     }
787 
788     qn = mve_qreg_ptr(a->qn);
789     qm = mve_qreg_ptr(a->qm);
790 
791     /*
792      * This insn is subject to beat-wise execution. Partial execution
793      * of an A=0 (no-accumulate) insn which does not execute the first
794      * beat must start with the current rda value, not 0.
795      */
796     if (a->a || mve_skip_first_beat(s)) {
797         rda = load_reg(s, a->rda);
798     } else {
799         rda = tcg_const_i32(0);
800     }
801 
802     fn(rda, cpu_env, qn, qm, rda);
803     store_reg(s, a->rda, rda);
804     tcg_temp_free_ptr(qn);
805     tcg_temp_free_ptr(qm);
806 
807     mve_update_eci(s);
808     return true;
809 }
810 
811 #define DO_DUAL_ACC(INSN, FN)                                           \
812     static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
813     {                                                                   \
814         static MVEGenDualAccOpFn * const fns[4][2] = {                  \
815             { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
816             { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
817             { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
818             { NULL, NULL },                                             \
819         };                                                              \
820         return do_dual_acc(s, a, fns[a->size][a->x]);                   \
821     }
822 
823 DO_DUAL_ACC(VMLADAV_S, vmladavs)
824 DO_DUAL_ACC(VMLSDAV, vmlsdav)
825 
826 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
827 {
828     static MVEGenDualAccOpFn * const fns[4][2] = {
829         { gen_helper_mve_vmladavub, NULL },
830         { gen_helper_mve_vmladavuh, NULL },
831         { gen_helper_mve_vmladavuw, NULL },
832         { NULL, NULL },
833     };
834     return do_dual_acc(s, a, fns[a->size][a->x]);
835 }
836 
837 static void gen_vpst(DisasContext *s, uint32_t mask)
838 {
839     /*
840      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
841      * being adjacent fields in the register.
842      *
843      * Updating the masks is not predicated, but it is subject to beat-wise
844      * execution, and the mask is updated on the odd-numbered beats.
845      * So if PSR.ECI says we should skip beat 1, we mustn't update the
846      * 01 mask field.
847      */
848     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
849     switch (s->eci) {
850     case ECI_NONE:
851     case ECI_A0:
852         /* Update both 01 and 23 fields */
853         tcg_gen_deposit_i32(vpr, vpr,
854                             tcg_constant_i32(mask | (mask << 4)),
855                             R_V7M_VPR_MASK01_SHIFT,
856                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
857         break;
858     case ECI_A0A1:
859     case ECI_A0A1A2:
860     case ECI_A0A1A2B0:
861         /* Update only the 23 mask field */
862         tcg_gen_deposit_i32(vpr, vpr,
863                             tcg_constant_i32(mask),
864                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
865         break;
866     default:
867         g_assert_not_reached();
868     }
869     store_cpu_field(vpr, v7m.vpr);
870 }
871 
872 static bool trans_VPST(DisasContext *s, arg_VPST *a)
873 {
874     /* mask == 0 is a "related encoding" */
875     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
876         return false;
877     }
878     if (!mve_eci_check(s) || !vfp_access_check(s)) {
879         return true;
880     }
881     gen_vpst(s, a->mask);
882     mve_update_and_store_eci(s);
883     return true;
884 }
885 
886 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
887 {
888     /* VADDV: vector add across vector */
889     static MVEGenVADDVFn * const fns[4][2] = {
890         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
891         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
892         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
893         { NULL, NULL }
894     };
895     TCGv_ptr qm;
896     TCGv_i32 rda;
897 
898     if (!dc_isar_feature(aa32_mve, s) ||
899         a->size == 3) {
900         return false;
901     }
902     if (!mve_eci_check(s) || !vfp_access_check(s)) {
903         return true;
904     }
905 
906     /*
907      * This insn is subject to beat-wise execution. Partial execution
908      * of an A=0 (no-accumulate) insn which does not execute the first
909      * beat must start with the current value of Rda, not zero.
910      */
911     if (a->a || mve_skip_first_beat(s)) {
912         /* Accumulate input from Rda */
913         rda = load_reg(s, a->rda);
914     } else {
915         /* Accumulate starting at zero */
916         rda = tcg_const_i32(0);
917     }
918 
919     qm = mve_qreg_ptr(a->qm);
920     fns[a->size][a->u](rda, cpu_env, qm, rda);
921     store_reg(s, a->rda, rda);
922     tcg_temp_free_ptr(qm);
923 
924     mve_update_eci(s);
925     return true;
926 }
927 
928 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
929 {
930     /*
931      * Vector Add Long Across Vector: accumulate the 32-bit
932      * elements of the vector into a 64-bit result stored in
933      * a pair of general-purpose registers.
934      * No need to check Qm's bank: it is only 3 bits in decode.
935      */
936     TCGv_ptr qm;
937     TCGv_i64 rda;
938     TCGv_i32 rdalo, rdahi;
939 
940     if (!dc_isar_feature(aa32_mve, s)) {
941         return false;
942     }
943     /*
944      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
945      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
946      */
947     if (a->rdahi == 13 || a->rdahi == 15) {
948         return false;
949     }
950     if (!mve_eci_check(s) || !vfp_access_check(s)) {
951         return true;
952     }
953 
954     /*
955      * This insn is subject to beat-wise execution. Partial execution
956      * of an A=0 (no-accumulate) insn which does not execute the first
957      * beat must start with the current value of RdaHi:RdaLo, not zero.
958      */
959     if (a->a || mve_skip_first_beat(s)) {
960         /* Accumulate input from RdaHi:RdaLo */
961         rda = tcg_temp_new_i64();
962         rdalo = load_reg(s, a->rdalo);
963         rdahi = load_reg(s, a->rdahi);
964         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
965         tcg_temp_free_i32(rdalo);
966         tcg_temp_free_i32(rdahi);
967     } else {
968         /* Accumulate starting at zero */
969         rda = tcg_const_i64(0);
970     }
971 
972     qm = mve_qreg_ptr(a->qm);
973     if (a->u) {
974         gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
975     } else {
976         gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
977     }
978     tcg_temp_free_ptr(qm);
979 
980     rdalo = tcg_temp_new_i32();
981     rdahi = tcg_temp_new_i32();
982     tcg_gen_extrl_i64_i32(rdalo, rda);
983     tcg_gen_extrh_i64_i32(rdahi, rda);
984     store_reg(s, a->rdalo, rdalo);
985     store_reg(s, a->rdahi, rdahi);
986     tcg_temp_free_i64(rda);
987     mve_update_eci(s);
988     return true;
989 }
990 
991 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
992 {
993     TCGv_ptr qd;
994     uint64_t imm;
995 
996     if (!dc_isar_feature(aa32_mve, s) ||
997         !mve_check_qreg_bank(s, a->qd) ||
998         !fn) {
999         return false;
1000     }
1001     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1002         return true;
1003     }
1004 
1005     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1006 
1007     qd = mve_qreg_ptr(a->qd);
1008     fn(cpu_env, qd, tcg_constant_i64(imm));
1009     tcg_temp_free_ptr(qd);
1010     mve_update_eci(s);
1011     return true;
1012 }
1013 
1014 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1015 {
1016     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1017     MVEGenOneOpImmFn *fn;
1018 
1019     if ((a->cmode & 1) && a->cmode < 12) {
1020         if (a->op) {
1021             /*
1022              * For op=1, the immediate will be inverted by asimd_imm_const(),
1023              * so the VBIC becomes a logical AND operation.
1024              */
1025             fn = gen_helper_mve_vandi;
1026         } else {
1027             fn = gen_helper_mve_vorri;
1028         }
1029     } else {
1030         /* There is one unallocated cmode/op combination in this space */
1031         if (a->cmode == 15 && a->op == 1) {
1032             return false;
1033         }
1034         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1035         fn = gen_helper_mve_vmovi;
1036     }
1037     return do_1imm(s, a, fn);
1038 }
1039 
1040 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1041                       bool negateshift)
1042 {
1043     TCGv_ptr qd, qm;
1044     int shift = a->shift;
1045 
1046     if (!dc_isar_feature(aa32_mve, s) ||
1047         !mve_check_qreg_bank(s, a->qd | a->qm) ||
1048         !fn) {
1049         return false;
1050     }
1051     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1052         return true;
1053     }
1054 
1055     /*
1056      * When we handle a right shift insn using a left-shift helper
1057      * which permits a negative shift count to indicate a right-shift,
1058      * we must negate the shift count.
1059      */
1060     if (negateshift) {
1061         shift = -shift;
1062     }
1063 
1064     qd = mve_qreg_ptr(a->qd);
1065     qm = mve_qreg_ptr(a->qm);
1066     fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1067     tcg_temp_free_ptr(qd);
1068     tcg_temp_free_ptr(qm);
1069     mve_update_eci(s);
1070     return true;
1071 }
1072 
1073 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)                         \
1074     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1075     {                                                           \
1076         static MVEGenTwoOpShiftFn * const fns[] = {             \
1077             gen_helper_mve_##FN##b,                             \
1078             gen_helper_mve_##FN##h,                             \
1079             gen_helper_mve_##FN##w,                             \
1080             NULL,                                               \
1081         };                                                      \
1082         return do_2shift(s, a, fns[a->size], NEGATESHIFT);      \
1083     }
1084 
1085 DO_2SHIFT(VSHLI, vshli_u, false)
1086 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1087 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1088 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1089 /* These right shifts use a left-shift helper with negated shift count */
1090 DO_2SHIFT(VSHRI_S, vshli_s, true)
1091 DO_2SHIFT(VSHRI_U, vshli_u, true)
1092 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1093 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1094 
1095 DO_2SHIFT(VSRI, vsri, false)
1096 DO_2SHIFT(VSLI, vsli, false)
1097 
1098 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1099                              MVEGenTwoOpShiftFn *fn)
1100 {
1101     TCGv_ptr qda;
1102     TCGv_i32 rm;
1103 
1104     if (!dc_isar_feature(aa32_mve, s) ||
1105         !mve_check_qreg_bank(s, a->qda) ||
1106         a->rm == 13 || a->rm == 15 || !fn) {
1107         /* Rm cases are UNPREDICTABLE */
1108         return false;
1109     }
1110     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1111         return true;
1112     }
1113 
1114     qda = mve_qreg_ptr(a->qda);
1115     rm = load_reg(s, a->rm);
1116     fn(cpu_env, qda, qda, rm);
1117     tcg_temp_free_ptr(qda);
1118     tcg_temp_free_i32(rm);
1119     mve_update_eci(s);
1120     return true;
1121 }
1122 
1123 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
1124     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
1125     {                                                                   \
1126         static MVEGenTwoOpShiftFn * const fns[] = {                     \
1127             gen_helper_mve_##FN##b,                                     \
1128             gen_helper_mve_##FN##h,                                     \
1129             gen_helper_mve_##FN##w,                                     \
1130             NULL,                                                       \
1131         };                                                              \
1132         return do_2shift_scalar(s, a, fns[a->size]);                    \
1133     }
1134 
1135 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1136 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1137 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1138 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1139 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1140 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1141 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1142 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1143 
1144 #define DO_VSHLL(INSN, FN)                                      \
1145     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1146     {                                                           \
1147         static MVEGenTwoOpShiftFn * const fns[] = {             \
1148             gen_helper_mve_##FN##b,                             \
1149             gen_helper_mve_##FN##h,                             \
1150         };                                                      \
1151         return do_2shift(s, a, fns[a->size], false);            \
1152     }
1153 
1154 DO_VSHLL(VSHLL_BS, vshllbs)
1155 DO_VSHLL(VSHLL_BU, vshllbu)
1156 DO_VSHLL(VSHLL_TS, vshllts)
1157 DO_VSHLL(VSHLL_TU, vshlltu)
1158 
1159 #define DO_2SHIFT_N(INSN, FN)                                   \
1160     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
1161     {                                                           \
1162         static MVEGenTwoOpShiftFn * const fns[] = {             \
1163             gen_helper_mve_##FN##b,                             \
1164             gen_helper_mve_##FN##h,                             \
1165         };                                                      \
1166         return do_2shift(s, a, fns[a->size], false);            \
1167     }
1168 
1169 DO_2SHIFT_N(VSHRNB, vshrnb)
1170 DO_2SHIFT_N(VSHRNT, vshrnt)
1171 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1172 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1173 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1174 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1175 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1176 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1177 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1178 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1179 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1180 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1181 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1182 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1183 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1184 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1185 
1186 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1187 {
1188     /*
1189      * Whole Vector Left Shift with Carry. The carry is taken
1190      * from a general purpose register and written back there.
1191      * An imm of 0 means "shift by 32".
1192      */
1193     TCGv_ptr qd;
1194     TCGv_i32 rdm;
1195 
1196     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1197         return false;
1198     }
1199     if (a->rdm == 13 || a->rdm == 15) {
1200         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1201         return false;
1202     }
1203     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1204         return true;
1205     }
1206 
1207     qd = mve_qreg_ptr(a->qd);
1208     rdm = load_reg(s, a->rdm);
1209     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1210     store_reg(s, a->rdm, rdm);
1211     tcg_temp_free_ptr(qd);
1212     mve_update_eci(s);
1213     return true;
1214 }
1215 
1216 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1217 {
1218     TCGv_ptr qd;
1219     TCGv_i32 rn;
1220 
1221     /*
1222      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1223      * This fills the vector with elements of successively increasing
1224      * or decreasing values, starting from Rn.
1225      */
1226     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1227         return false;
1228     }
1229     if (a->size == MO_64) {
1230         /* size 0b11 is another encoding */
1231         return false;
1232     }
1233     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1234         return true;
1235     }
1236 
1237     qd = mve_qreg_ptr(a->qd);
1238     rn = load_reg(s, a->rn);
1239     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1240     store_reg(s, a->rn, rn);
1241     tcg_temp_free_ptr(qd);
1242     mve_update_eci(s);
1243     return true;
1244 }
1245 
1246 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1247 {
1248     TCGv_ptr qd;
1249     TCGv_i32 rn, rm;
1250 
1251     /*
1252      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1253      * This fills the vector with elements of successively increasing
1254      * or decreasing values, starting from Rn. Rm specifies a point where
1255      * the count wraps back around to 0. The updated offset is written back
1256      * to Rn.
1257      */
1258     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1259         return false;
1260     }
1261     if (!fn || a->rm == 13 || a->rm == 15) {
1262         /*
1263          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1264          * Rm == 13 is VIWDUP, VDWDUP.
1265          */
1266         return false;
1267     }
1268     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1269         return true;
1270     }
1271 
1272     qd = mve_qreg_ptr(a->qd);
1273     rn = load_reg(s, a->rn);
1274     rm = load_reg(s, a->rm);
1275     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1276     store_reg(s, a->rn, rn);
1277     tcg_temp_free_ptr(qd);
1278     tcg_temp_free_i32(rm);
1279     mve_update_eci(s);
1280     return true;
1281 }
1282 
1283 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1284 {
1285     static MVEGenVIDUPFn * const fns[] = {
1286         gen_helper_mve_vidupb,
1287         gen_helper_mve_viduph,
1288         gen_helper_mve_vidupw,
1289         NULL,
1290     };
1291     return do_vidup(s, a, fns[a->size]);
1292 }
1293 
1294 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1295 {
1296     static MVEGenVIDUPFn * const fns[] = {
1297         gen_helper_mve_vidupb,
1298         gen_helper_mve_viduph,
1299         gen_helper_mve_vidupw,
1300         NULL,
1301     };
1302     /* VDDUP is just like VIDUP but with a negative immediate */
1303     a->imm = -a->imm;
1304     return do_vidup(s, a, fns[a->size]);
1305 }
1306 
1307 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1308 {
1309     static MVEGenVIWDUPFn * const fns[] = {
1310         gen_helper_mve_viwdupb,
1311         gen_helper_mve_viwduph,
1312         gen_helper_mve_viwdupw,
1313         NULL,
1314     };
1315     return do_viwdup(s, a, fns[a->size]);
1316 }
1317 
1318 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1319 {
1320     static MVEGenVIWDUPFn * const fns[] = {
1321         gen_helper_mve_vdwdupb,
1322         gen_helper_mve_vdwduph,
1323         gen_helper_mve_vdwdupw,
1324         NULL,
1325     };
1326     return do_viwdup(s, a, fns[a->size]);
1327 }
1328 
1329 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1330 {
1331     TCGv_ptr qn, qm;
1332 
1333     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1334         !fn) {
1335         return false;
1336     }
1337     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1338         return true;
1339     }
1340 
1341     qn = mve_qreg_ptr(a->qn);
1342     qm = mve_qreg_ptr(a->qm);
1343     fn(cpu_env, qn, qm);
1344     tcg_temp_free_ptr(qn);
1345     tcg_temp_free_ptr(qm);
1346     if (a->mask) {
1347         /* VPT */
1348         gen_vpst(s, a->mask);
1349     }
1350     mve_update_eci(s);
1351     return true;
1352 }
1353 
1354 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1355                            MVEGenScalarCmpFn *fn)
1356 {
1357     TCGv_ptr qn;
1358     TCGv_i32 rm;
1359 
1360     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1361         return false;
1362     }
1363     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1364         return true;
1365     }
1366 
1367     qn = mve_qreg_ptr(a->qn);
1368     if (a->rm == 15) {
1369         /* Encoding Rm=0b1111 means "constant zero" */
1370         rm = tcg_constant_i32(0);
1371     } else {
1372         rm = load_reg(s, a->rm);
1373     }
1374     fn(cpu_env, qn, rm);
1375     tcg_temp_free_ptr(qn);
1376     tcg_temp_free_i32(rm);
1377     if (a->mask) {
1378         /* VPT */
1379         gen_vpst(s, a->mask);
1380     }
1381     mve_update_eci(s);
1382     return true;
1383 }
1384 
1385 #define DO_VCMP(INSN, FN)                                       \
1386     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
1387     {                                                           \
1388         static MVEGenCmpFn * const fns[] = {                    \
1389             gen_helper_mve_##FN##b,                             \
1390             gen_helper_mve_##FN##h,                             \
1391             gen_helper_mve_##FN##w,                             \
1392             NULL,                                               \
1393         };                                                      \
1394         return do_vcmp(s, a, fns[a->size]);                     \
1395     }                                                           \
1396     static bool trans_##INSN##_scalar(DisasContext *s,          \
1397                                       arg_vcmp_scalar *a)       \
1398     {                                                           \
1399         static MVEGenScalarCmpFn * const fns[] = {              \
1400             gen_helper_mve_##FN##_scalarb,                      \
1401             gen_helper_mve_##FN##_scalarh,                      \
1402             gen_helper_mve_##FN##_scalarw,                      \
1403             NULL,                                               \
1404         };                                                      \
1405         return do_vcmp_scalar(s, a, fns[a->size]);              \
1406     }
1407 
1408 DO_VCMP(VCMPEQ, vcmpeq)
1409 DO_VCMP(VCMPNE, vcmpne)
1410 DO_VCMP(VCMPCS, vcmpcs)
1411 DO_VCMP(VCMPHI, vcmphi)
1412 DO_VCMP(VCMPGE, vcmpge)
1413 DO_VCMP(VCMPLT, vcmplt)
1414 DO_VCMP(VCMPGT, vcmpgt)
1415 DO_VCMP(VCMPLE, vcmple)
1416 
1417 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
1418 {
1419     /*
1420      * MIN/MAX operations across a vector: compute the min or
1421      * max of the initial value in a general purpose register
1422      * and all the elements in the vector, and store it back
1423      * into the general purpose register.
1424      */
1425     TCGv_ptr qm;
1426     TCGv_i32 rda;
1427 
1428     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1429         !fn || a->rda == 13 || a->rda == 15) {
1430         /* Rda cases are UNPREDICTABLE */
1431         return false;
1432     }
1433     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1434         return true;
1435     }
1436 
1437     qm = mve_qreg_ptr(a->qm);
1438     rda = load_reg(s, a->rda);
1439     fn(rda, cpu_env, qm, rda);
1440     store_reg(s, a->rda, rda);
1441     tcg_temp_free_ptr(qm);
1442     mve_update_eci(s);
1443     return true;
1444 }
1445 
1446 #define DO_VMAXV(INSN, FN)                                      \
1447     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
1448     {                                                           \
1449         static MVEGenVADDVFn * const fns[] = {                  \
1450             gen_helper_mve_##FN##b,                             \
1451             gen_helper_mve_##FN##h,                             \
1452             gen_helper_mve_##FN##w,                             \
1453             NULL,                                               \
1454         };                                                      \
1455         return do_vmaxv(s, a, fns[a->size]);                    \
1456     }
1457 
1458 DO_VMAXV(VMAXV_S, vmaxvs)
1459 DO_VMAXV(VMAXV_U, vmaxvu)
1460 DO_VMAXV(VMAXAV, vmaxav)
1461 DO_VMAXV(VMINV_S, vminvs)
1462 DO_VMAXV(VMINV_U, vminvu)
1463 DO_VMAXV(VMINAV, vminav)
1464 
1465 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
1466 {
1467     /* Absolute difference accumulated across vector */
1468     TCGv_ptr qn, qm;
1469     TCGv_i32 rda;
1470 
1471     if (!dc_isar_feature(aa32_mve, s) ||
1472         !mve_check_qreg_bank(s, a->qm | a->qn) ||
1473         !fn || a->rda == 13 || a->rda == 15) {
1474         /* Rda cases are UNPREDICTABLE */
1475         return false;
1476     }
1477     if (!mve_eci_check(s) || !vfp_access_check(s)) {
1478         return true;
1479     }
1480 
1481     qm = mve_qreg_ptr(a->qm);
1482     qn = mve_qreg_ptr(a->qn);
1483     rda = load_reg(s, a->rda);
1484     fn(rda, cpu_env, qn, qm, rda);
1485     store_reg(s, a->rda, rda);
1486     tcg_temp_free_ptr(qm);
1487     tcg_temp_free_ptr(qn);
1488     mve_update_eci(s);
1489     return true;
1490 }
1491 
1492 #define DO_VABAV(INSN, FN)                                      \
1493     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
1494     {                                                           \
1495         static MVEGenVABAVFn * const fns[] = {                  \
1496             gen_helper_mve_##FN##b,                             \
1497             gen_helper_mve_##FN##h,                             \
1498             gen_helper_mve_##FN##w,                             \
1499             NULL,                                               \
1500         };                                                      \
1501         return do_vabav(s, a, fns[a->size]);                    \
1502     }
1503 
1504 DO_VABAV(VABAV_S, vabavs)
1505 DO_VABAV(VABAV_U, vabavu)
1506