xref: /qemu/target/arm/tcg/translate-sve.c (revision df6fe2abf2e990f767ce755d426bc439c7bba336)
1 /*
2  * AArch64 SVE translation
3  *
4  * Copyright (c) 2018 Linaro, Ltd
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "translate.h"
22 #include "translate-a64.h"
23 #include "fpu/softfloat.h"
24 
25 
26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
27                          TCGv_i64, uint32_t, uint32_t);
28 
29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
30                                      TCGv_ptr, TCGv_i32);
31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
32                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
33 
34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
36                                          TCGv_ptr, TCGv_i64, TCGv_i32);
37 
38 /*
39  * Helpers for extracting complex instruction fields.
40  */
41 
42 /* See e.g. ASR (immediate, predicated).
43  * Returns -1 for unallocated encoding; diagnose later.
44  */
45 static int tszimm_esz(DisasContext *s, int x)
46 {
47     x >>= 3;  /* discard imm3 */
48     return 31 - clz32(x);
49 }
50 
51 static int tszimm_shr(DisasContext *s, int x)
52 {
53     /*
54      * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the
55      * trans function will check for esz < 0), so we can return any
56      * value we like from here in that case as long as we avoid UB.
57      */
58     int esz = tszimm_esz(s, x);
59     if (esz < 0) {
60         return esz;
61     }
62     return (16 << esz) - x;
63 }
64 
65 /* See e.g. LSL (immediate, predicated).  */
66 static int tszimm_shl(DisasContext *s, int x)
67 {
68     /* As with tszimm_shr(), value will be unused if esz < 0 */
69     int esz = tszimm_esz(s, x);
70     if (esz < 0) {
71         return esz;
72     }
73     return x - (8 << esz);
74 }
75 
76 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
77 static inline int expand_imm_sh8s(DisasContext *s, int x)
78 {
79     return (int8_t)x << (x & 0x100 ? 8 : 0);
80 }
81 
82 static inline int expand_imm_sh8u(DisasContext *s, int x)
83 {
84     return (uint8_t)x << (x & 0x100 ? 8 : 0);
85 }
86 
87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
88  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
89  */
90 static inline int msz_dtype(DisasContext *s, int msz)
91 {
92     static const uint8_t dtype[5] = { 0, 5, 10, 15, 18 };
93     return dtype[msz];
94 }
95 
96 /*
97  * Include the generated decoder.
98  */
99 
100 #include "decode-sve.c.inc"
101 
102 /*
103  * Implement all of the translator functions referenced by the decoder.
104  */
105 
106 /* Invoke an out-of-line helper on 2 Zregs. */
107 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
108                             int rd, int rn, int data)
109 {
110     if (fn == NULL) {
111         return false;
112     }
113     if (sve_access_check(s)) {
114         unsigned vsz = vec_full_reg_size(s);
115         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
116                            vec_full_reg_offset(s, rn),
117                            vsz, vsz, data, fn);
118     }
119     return true;
120 }
121 
122 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
123                              int rd, int rn, int data,
124                              ARMFPStatusFlavour flavour)
125 {
126     if (fn == NULL) {
127         return false;
128     }
129     if (sve_access_check(s)) {
130         unsigned vsz = vec_full_reg_size(s);
131         TCGv_ptr status = fpstatus_ptr(flavour);
132 
133         tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
134                            vec_full_reg_offset(s, rn),
135                            status, vsz, vsz, data, fn);
136     }
137     return true;
138 }
139 
140 static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
141                                     arg_rr_esz *a, int data)
142 {
143     return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
144                             select_ah_fpst(s, a->esz));
145 }
146 
147 /* Invoke an out-of-line helper on 3 Zregs. */
148 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
149                              int rd, int rn, int rm, int data)
150 {
151     if (fn == NULL) {
152         return false;
153     }
154     if (sve_access_check(s)) {
155         unsigned vsz = vec_full_reg_size(s);
156         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
157                            vec_full_reg_offset(s, rn),
158                            vec_full_reg_offset(s, rm),
159                            vsz, vsz, data, fn);
160     }
161     return true;
162 }
163 
164 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
165                                  arg_rrr_esz *a, int data)
166 {
167     return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
168 }
169 
170 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
171 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
172                               int rd, int rn, int rm,
173                               int data, ARMFPStatusFlavour flavour)
174 {
175     if (fn == NULL) {
176         return false;
177     }
178     if (sve_access_check(s)) {
179         unsigned vsz = vec_full_reg_size(s);
180         TCGv_ptr status = fpstatus_ptr(flavour);
181 
182         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
183                            vec_full_reg_offset(s, rn),
184                            vec_full_reg_offset(s, rm),
185                            status, vsz, vsz, data, fn);
186     }
187     return true;
188 }
189 
190 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
191                                   arg_rrr_esz *a, int data)
192 {
193     return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
194                              a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
195 }
196 
197 static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
198                                      arg_rrr_esz *a, int data)
199 {
200     return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
201                              select_ah_fpst(s, a->esz));
202 }
203 
204 /* Invoke an out-of-line helper on 4 Zregs. */
205 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
206                               int rd, int rn, int rm, int ra, int data)
207 {
208     if (fn == NULL) {
209         return false;
210     }
211     if (sve_access_check(s)) {
212         unsigned vsz = vec_full_reg_size(s);
213         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
214                            vec_full_reg_offset(s, rn),
215                            vec_full_reg_offset(s, rm),
216                            vec_full_reg_offset(s, ra),
217                            vsz, vsz, data, fn);
218     }
219     return true;
220 }
221 
222 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
223                                   arg_rrrr_esz *a, int data)
224 {
225     return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
226 }
227 
228 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
229                                   arg_rrxr_esz *a)
230 {
231     return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
232 }
233 
234 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
235 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
236                               int rd, int rn, int rm, int ra,
237                               int data, TCGv_ptr ptr)
238 {
239     if (fn == NULL) {
240         return false;
241     }
242     if (sve_access_check(s)) {
243         unsigned vsz = vec_full_reg_size(s);
244         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
245                            vec_full_reg_offset(s, rn),
246                            vec_full_reg_offset(s, rm),
247                            vec_full_reg_offset(s, ra),
248                            ptr, vsz, vsz, data, fn);
249     }
250     return true;
251 }
252 
253 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
254                                int rd, int rn, int rm, int ra,
255                                int data, ARMFPStatusFlavour flavour)
256 {
257     TCGv_ptr status = fpstatus_ptr(flavour);
258     bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
259     return ret;
260 }
261 
262 static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
263                               int rd, int rn, int rm, int ra,
264                               int data)
265 {
266     return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env);
267 }
268 
269 static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
270                                   arg_rrrr_esz *a, int data)
271 {
272     return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
273 }
274 
275 static bool gen_gvec_env_arg_zzxz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
276                                   arg_rrxr_esz *a)
277 {
278     return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
279 }
280 
281 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
282 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
283                                 int rd, int rn, int rm, int ra, int pg,
284                                 int data, ARMFPStatusFlavour flavour)
285 {
286     if (fn == NULL) {
287         return false;
288     }
289     if (sve_access_check(s)) {
290         unsigned vsz = vec_full_reg_size(s);
291         TCGv_ptr status = fpstatus_ptr(flavour);
292 
293         tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
294                            vec_full_reg_offset(s, rn),
295                            vec_full_reg_offset(s, rm),
296                            vec_full_reg_offset(s, ra),
297                            pred_full_reg_offset(s, pg),
298                            status, vsz, vsz, data, fn);
299     }
300     return true;
301 }
302 
303 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
304 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
305                              int rd, int rn, int pg, int data)
306 {
307     if (fn == NULL) {
308         return false;
309     }
310     if (sve_access_check(s)) {
311         unsigned vsz = vec_full_reg_size(s);
312         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
313                            vec_full_reg_offset(s, rn),
314                            pred_full_reg_offset(s, pg),
315                            vsz, vsz, data, fn);
316     }
317     return true;
318 }
319 
320 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
321                                  arg_rpr_esz *a, int data)
322 {
323     return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
324 }
325 
326 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
327                                   arg_rpri_esz *a)
328 {
329     return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
330 }
331 
332 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
333                               int rd, int rn, int pg, int data,
334                               ARMFPStatusFlavour flavour)
335 {
336     if (fn == NULL) {
337         return false;
338     }
339     if (sve_access_check(s)) {
340         unsigned vsz = vec_full_reg_size(s);
341         TCGv_ptr status = fpstatus_ptr(flavour);
342 
343         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
344                            vec_full_reg_offset(s, rn),
345                            pred_full_reg_offset(s, pg),
346                            status, vsz, vsz, data, fn);
347     }
348     return true;
349 }
350 
351 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
352                                   arg_rpr_esz *a, int data,
353                                   ARMFPStatusFlavour flavour)
354 {
355     return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
356 }
357 
358 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
359 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
360                               int rd, int rn, int rm, int pg, int data)
361 {
362     if (fn == NULL) {
363         return false;
364     }
365     if (sve_access_check(s)) {
366         unsigned vsz = vec_full_reg_size(s);
367         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
368                            vec_full_reg_offset(s, rn),
369                            vec_full_reg_offset(s, rm),
370                            pred_full_reg_offset(s, pg),
371                            vsz, vsz, data, fn);
372     }
373     return true;
374 }
375 
376 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
377                                   arg_rprr_esz *a, int data)
378 {
379     return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
380 }
381 
382 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
383 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
384                                int rd, int rn, int rm, int pg, int data,
385                                ARMFPStatusFlavour flavour)
386 {
387     if (fn == NULL) {
388         return false;
389     }
390     if (sve_access_check(s)) {
391         unsigned vsz = vec_full_reg_size(s);
392         TCGv_ptr status = fpstatus_ptr(flavour);
393 
394         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
395                            vec_full_reg_offset(s, rn),
396                            vec_full_reg_offset(s, rm),
397                            pred_full_reg_offset(s, pg),
398                            status, vsz, vsz, data, fn);
399     }
400     return true;
401 }
402 
403 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
404                                    arg_rprr_esz *a)
405 {
406     return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
407                               a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
408 }
409 
410 /* Invoke a vector expander on two Zregs and an immediate.  */
411 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
412                             int esz, int rd, int rn, uint64_t imm)
413 {
414     if (gvec_fn == NULL) {
415         return false;
416     }
417     if (sve_access_check(s)) {
418         unsigned vsz = vec_full_reg_size(s);
419         gvec_fn(esz, vec_full_reg_offset(s, rd),
420                 vec_full_reg_offset(s, rn), imm, vsz, vsz);
421     }
422     return true;
423 }
424 
425 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
426                                 arg_rri_esz *a)
427 {
428     if (a->esz < 0) {
429         /* Invalid tsz encoding -- see tszimm_esz. */
430         return false;
431     }
432     return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
433 }
434 
435 /* Invoke a vector expander on three Zregs.  */
436 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
437                             int esz, int rd, int rn, int rm)
438 {
439     if (gvec_fn == NULL) {
440         return false;
441     }
442     if (sve_access_check(s)) {
443         unsigned vsz = vec_full_reg_size(s);
444         gvec_fn(esz, vec_full_reg_offset(s, rd),
445                 vec_full_reg_offset(s, rn),
446                 vec_full_reg_offset(s, rm), vsz, vsz);
447     }
448     return true;
449 }
450 
451 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
452                                 arg_rrr_esz *a)
453 {
454     return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
455 }
456 
457 /* Invoke a vector expander on four Zregs.  */
458 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
459                                  arg_rrrr_esz *a)
460 {
461     if (gvec_fn == NULL) {
462         return false;
463     }
464     if (sve_access_check(s)) {
465         unsigned vsz = vec_full_reg_size(s);
466         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
467                 vec_full_reg_offset(s, a->rn),
468                 vec_full_reg_offset(s, a->rm),
469                 vec_full_reg_offset(s, a->ra), vsz, vsz);
470     }
471     return true;
472 }
473 
474 /* Invoke a vector move on two Zregs.  */
475 static bool do_mov_z(DisasContext *s, int rd, int rn)
476 {
477     if (sve_access_check(s)) {
478         unsigned vsz = vec_full_reg_size(s);
479         tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
480                          vec_full_reg_offset(s, rn), vsz, vsz);
481     }
482     return true;
483 }
484 
485 /* Initialize a Zreg with replications of a 64-bit immediate.  */
486 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
487 {
488     unsigned vsz = vec_full_reg_size(s);
489     tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
490 }
491 
492 /* Invoke a vector expander on three Pregs.  */
493 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
494                             int rd, int rn, int rm)
495 {
496     if (sve_access_check(s)) {
497         unsigned psz = pred_gvec_reg_size(s);
498         gvec_fn(MO_64, pred_full_reg_offset(s, rd),
499                 pred_full_reg_offset(s, rn),
500                 pred_full_reg_offset(s, rm), psz, psz);
501     }
502     return true;
503 }
504 
505 /* Invoke a vector move on two Pregs.  */
506 static bool do_mov_p(DisasContext *s, int rd, int rn)
507 {
508     if (sve_access_check(s)) {
509         unsigned psz = pred_gvec_reg_size(s);
510         tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
511                          pred_full_reg_offset(s, rn), psz, psz);
512     }
513     return true;
514 }
515 
516 /* Set the cpu flags as per a return from an SVE helper.  */
517 static void do_pred_flags(TCGv_i32 t)
518 {
519     tcg_gen_mov_i32(cpu_NF, t);
520     tcg_gen_andi_i32(cpu_ZF, t, 2);
521     tcg_gen_andi_i32(cpu_CF, t, 1);
522     tcg_gen_movi_i32(cpu_VF, 0);
523 }
524 
525 /* Subroutines computing the ARM PredTest psuedofunction.  */
526 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
527 {
528     TCGv_i32 t = tcg_temp_new_i32();
529 
530     gen_helper_sve_predtest1(t, d, g);
531     do_pred_flags(t);
532 }
533 
534 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
535 {
536     TCGv_ptr dptr = tcg_temp_new_ptr();
537     TCGv_ptr gptr = tcg_temp_new_ptr();
538     TCGv_i32 t = tcg_temp_new_i32();
539 
540     tcg_gen_addi_ptr(dptr, tcg_env, dofs);
541     tcg_gen_addi_ptr(gptr, tcg_env, gofs);
542 
543     gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
544 
545     do_pred_flags(t);
546 }
547 
548 /* For each element size, the bits within a predicate word that are active.  */
549 const uint64_t pred_esz_masks[5] = {
550     0xffffffffffffffffull, 0x5555555555555555ull,
551     0x1111111111111111ull, 0x0101010101010101ull,
552     0x0001000100010001ull,
553 };
554 
555 static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
556 {
557     unallocated_encoding(s);
558     return true;
559 }
560 
561 /*
562  *** SVE Logical - Unpredicated Group
563  */
564 
565 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
566 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
567 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
568 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
569 
570 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
571 {
572     if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
573         return false;
574     }
575     if (sve_access_check(s)) {
576         unsigned vsz = vec_full_reg_size(s);
577         gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
578                      vec_full_reg_offset(s, a->rn),
579                      vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
580     }
581     return true;
582 }
583 
584 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a)
585 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a)
586 
587 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
588                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
589 {
590     /* BSL differs from the generic bitsel in argument ordering. */
591     tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
592 }
593 
594 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
595 
596 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
597 {
598     tcg_gen_andc_i64(n, k, n);
599     tcg_gen_andc_i64(m, m, k);
600     tcg_gen_or_i64(d, n, m);
601 }
602 
603 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
604                           TCGv_vec m, TCGv_vec k)
605 {
606     tcg_gen_not_vec(vece, n, n);
607     tcg_gen_bitsel_vec(vece, d, k, n, m);
608 }
609 
610 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
611                       uint32_t a, uint32_t oprsz, uint32_t maxsz)
612 {
613     static const GVecGen4 op = {
614         .fni8 = gen_bsl1n_i64,
615         .fniv = gen_bsl1n_vec,
616         .fno = gen_helper_sve2_bsl1n,
617         .vece = MO_64,
618         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
619     };
620     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
621 }
622 
623 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
624 
625 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
626 {
627     /*
628      * Z[dn] = (n & k) | (~m & ~k)
629      *       =         | ~(m | k)
630      */
631     tcg_gen_and_i64(n, n, k);
632     if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) {
633         tcg_gen_or_i64(m, m, k);
634         tcg_gen_orc_i64(d, n, m);
635     } else {
636         tcg_gen_nor_i64(m, m, k);
637         tcg_gen_or_i64(d, n, m);
638     }
639 }
640 
641 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
642                           TCGv_vec m, TCGv_vec k)
643 {
644     tcg_gen_not_vec(vece, m, m);
645     tcg_gen_bitsel_vec(vece, d, k, n, m);
646 }
647 
648 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
649                       uint32_t a, uint32_t oprsz, uint32_t maxsz)
650 {
651     static const GVecGen4 op = {
652         .fni8 = gen_bsl2n_i64,
653         .fniv = gen_bsl2n_vec,
654         .fno = gen_helper_sve2_bsl2n,
655         .vece = MO_64,
656         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
657     };
658     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
659 }
660 
661 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
662 
663 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
664 {
665     tcg_gen_and_i64(n, n, k);
666     tcg_gen_andc_i64(m, m, k);
667     tcg_gen_nor_i64(d, n, m);
668 }
669 
670 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
671                           TCGv_vec m, TCGv_vec k)
672 {
673     tcg_gen_bitsel_vec(vece, d, k, n, m);
674     tcg_gen_not_vec(vece, d, d);
675 }
676 
677 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
678                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
679 {
680     static const GVecGen4 op = {
681         .fni8 = gen_nbsl_i64,
682         .fniv = gen_nbsl_vec,
683         .fno = gen_helper_sve2_nbsl,
684         .vece = MO_64,
685         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
686     };
687     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
688 }
689 
690 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
691 
692 /*
693  *** SVE Integer Arithmetic - Unpredicated Group
694  */
695 
696 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
697 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
698 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
699 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
700 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
701 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
702 
703 /*
704  *** SVE Integer Arithmetic - Binary Predicated Group
705  */
706 
707 /* Select active elememnts from Zn and inactive elements from Zm,
708  * storing the result in Zd.
709  */
710 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
711 {
712     static gen_helper_gvec_4 * const fns[4] = {
713         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
714         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
715     };
716     return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
717 }
718 
719 #define DO_ZPZZ(NAME, FEAT, name) \
720     static gen_helper_gvec_4 * const name##_zpzz_fns[4] = {               \
721         gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h,           \
722         gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d,           \
723     };                                                                    \
724     TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz,                         \
725                name##_zpzz_fns[a->esz], a, 0)
726 
727 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
728 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
729 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
730 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
731 
732 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
733 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
734 
735 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
736 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
737 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
738 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
739 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
740 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
741 
742 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
743 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
744 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
745 
746 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
747 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
748 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
749 
750 static gen_helper_gvec_4 * const sdiv_fns[4] = {
751     NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
752 };
753 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
754 
755 static gen_helper_gvec_4 * const udiv_fns[4] = {
756     NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
757 };
758 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
759 
760 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
761 
762 /*
763  *** SVE Integer Arithmetic - Unary Predicated Group
764  */
765 
766 #define DO_ZPZ(NAME, FEAT, name) \
767     static gen_helper_gvec_3 * const name##_fns[4] = {              \
768         gen_helper_##name##_b, gen_helper_##name##_h,               \
769         gen_helper_##name##_s, gen_helper_##name##_d,               \
770     };                                                              \
771     TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
772 
773 DO_ZPZ(CLS, aa64_sve, sve_cls)
774 DO_ZPZ(CLZ, aa64_sve, sve_clz)
775 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
776 DO_ZPZ(CNOT, aa64_sve, sve_cnot)
777 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
778 DO_ZPZ(ABS, aa64_sve, sve_abs)
779 DO_ZPZ(NEG, aa64_sve, sve_neg)
780 DO_ZPZ(RBIT, aa64_sve, sve_rbit)
781 DO_ZPZ(ORQV, aa64_sme2p1_or_sve2p1, sve2p1_orqv)
782 DO_ZPZ(EORQV, aa64_sme2p1_or_sve2p1, sve2p1_eorqv)
783 DO_ZPZ(ANDQV, aa64_sme2p1_or_sve2p1, sve2p1_andqv)
784 
785 static gen_helper_gvec_3 * const fabs_fns[4] = {
786     NULL,                  gen_helper_sve_fabs_h,
787     gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
788 };
789 static gen_helper_gvec_3 * const fabs_ah_fns[4] = {
790     NULL,                  gen_helper_sve_ah_fabs_h,
791     gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d,
792 };
793 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz,
794            s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0)
795 
796 static gen_helper_gvec_3 * const fneg_fns[4] = {
797     NULL,                  gen_helper_sve_fneg_h,
798     gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
799 };
800 static gen_helper_gvec_3 * const fneg_ah_fns[4] = {
801     NULL,                  gen_helper_sve_ah_fneg_h,
802     gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d,
803 };
804 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz,
805            s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0)
806 
807 static gen_helper_gvec_3 * const sxtb_fns[4] = {
808     NULL,                  gen_helper_sve_sxtb_h,
809     gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
810 };
811 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
812 
813 static gen_helper_gvec_3 * const uxtb_fns[4] = {
814     NULL,                  gen_helper_sve_uxtb_h,
815     gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
816 };
817 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
818 
819 static gen_helper_gvec_3 * const sxth_fns[4] = {
820     NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
821 };
822 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
823 
824 static gen_helper_gvec_3 * const uxth_fns[4] = {
825     NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
826 };
827 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
828 
829 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
830            a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
831 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
832            a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
833 
834 static gen_helper_gvec_3 * const addqv_fns[4] = {
835     gen_helper_sve2p1_addqv_b, gen_helper_sve2p1_addqv_h,
836     gen_helper_sve2p1_addqv_s, gen_helper_sve2p1_addqv_d,
837 };
838 TRANS_FEAT(ADDQV, aa64_sme2p1_or_sve2p1,
839            gen_gvec_ool_arg_zpz, addqv_fns[a->esz], a, 0)
840 
841 static gen_helper_gvec_3 * const smaxqv_fns[4] = {
842     gen_helper_sve2p1_smaxqv_b, gen_helper_sve2p1_smaxqv_h,
843     gen_helper_sve2p1_smaxqv_s, gen_helper_sve2p1_smaxqv_d,
844 };
845 TRANS_FEAT(SMAXQV, aa64_sme2p1_or_sve2p1,
846            gen_gvec_ool_arg_zpz, smaxqv_fns[a->esz], a, 0)
847 
848 static gen_helper_gvec_3 * const sminqv_fns[4] = {
849     gen_helper_sve2p1_sminqv_b, gen_helper_sve2p1_sminqv_h,
850     gen_helper_sve2p1_sminqv_s, gen_helper_sve2p1_sminqv_d,
851 };
852 TRANS_FEAT(SMINQV, aa64_sme2p1_or_sve2p1,
853            gen_gvec_ool_arg_zpz, sminqv_fns[a->esz], a, 0)
854 
855 static gen_helper_gvec_3 * const umaxqv_fns[4] = {
856     gen_helper_sve2p1_umaxqv_b, gen_helper_sve2p1_umaxqv_h,
857     gen_helper_sve2p1_umaxqv_s, gen_helper_sve2p1_umaxqv_d,
858 };
859 TRANS_FEAT(UMAXQV, aa64_sme2p1_or_sve2p1,
860            gen_gvec_ool_arg_zpz, umaxqv_fns[a->esz], a, 0)
861 
862 static gen_helper_gvec_3 * const uminqv_fns[4] = {
863     gen_helper_sve2p1_uminqv_b, gen_helper_sve2p1_uminqv_h,
864     gen_helper_sve2p1_uminqv_s, gen_helper_sve2p1_uminqv_d,
865 };
866 TRANS_FEAT(UMINQV, aa64_sme2p1_or_sve2p1,
867            gen_gvec_ool_arg_zpz, uminqv_fns[a->esz], a, 0)
868 
869 /*
870  *** SVE Integer Reduction Group
871  */
872 
873 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
874 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
875                        gen_helper_gvec_reduc *fn)
876 {
877     unsigned vsz = vec_full_reg_size(s);
878     TCGv_ptr t_zn, t_pg;
879     TCGv_i32 desc;
880     TCGv_i64 temp;
881 
882     if (fn == NULL) {
883         return false;
884     }
885     if (!sve_access_check(s)) {
886         return true;
887     }
888 
889     desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
890     temp = tcg_temp_new_i64();
891     t_zn = tcg_temp_new_ptr();
892     t_pg = tcg_temp_new_ptr();
893 
894     tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
895     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
896     fn(temp, t_zn, t_pg, desc);
897 
898     write_fp_dreg(s, a->rd, temp);
899     return true;
900 }
901 
902 #define DO_VPZ(NAME, name) \
903     static gen_helper_gvec_reduc * const name##_fns[4] = {               \
904         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
905         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
906     };                                                                   \
907     TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
908 
909 DO_VPZ(ORV, orv)
910 DO_VPZ(ANDV, andv)
911 DO_VPZ(EORV, eorv)
912 
913 DO_VPZ(UADDV, uaddv)
914 DO_VPZ(SMAXV, smaxv)
915 DO_VPZ(UMAXV, umaxv)
916 DO_VPZ(SMINV, sminv)
917 DO_VPZ(UMINV, uminv)
918 
919 static gen_helper_gvec_reduc * const saddv_fns[4] = {
920     gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
921     gen_helper_sve_saddv_s, NULL
922 };
923 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
924 
925 #undef DO_VPZ
926 
927 /*
928  *** SVE Shift by Immediate - Predicated Group
929  */
930 
931 /*
932  * Copy Zn into Zd, storing zeros into inactive elements.
933  * If invert, store zeros into the active elements.
934  */
935 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
936                         int esz, bool invert)
937 {
938     static gen_helper_gvec_3 * const fns[4] = {
939         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
940         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
941     };
942     return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
943 }
944 
945 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
946                           gen_helper_gvec_3 * const fns[4])
947 {
948     int max;
949 
950     if (a->esz < 0) {
951         /* Invalid tsz encoding -- see tszimm_esz. */
952         return false;
953     }
954 
955     /*
956      * Shift by element size is architecturally valid.
957      * For arithmetic right-shift, it's the same as by one less.
958      * For logical shifts and ASRD, it is a zeroing operation.
959      */
960     max = 8 << a->esz;
961     if (a->imm >= max) {
962         if (asr) {
963             a->imm = max - 1;
964         } else {
965             return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
966         }
967     }
968     return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
969 }
970 
971 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
972     gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
973     gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
974 };
975 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
976 
977 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
978     gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
979     gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
980 };
981 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
982 
983 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
984     gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
985     gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
986 };
987 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
988 
989 static gen_helper_gvec_3 * const asrd_fns[4] = {
990     gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
991     gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
992 };
993 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
994 
995 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
996     gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
997     gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
998 };
999 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1000            a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
1001 
1002 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
1003     gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1004     gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1005 };
1006 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1007            a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
1008 
1009 static gen_helper_gvec_3 * const srshr_fns[4] = {
1010     gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1011     gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1012 };
1013 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1014            a->esz < 0 ? NULL : srshr_fns[a->esz], a)
1015 
1016 static gen_helper_gvec_3 * const urshr_fns[4] = {
1017     gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1018     gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1019 };
1020 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1021            a->esz < 0 ? NULL : urshr_fns[a->esz], a)
1022 
1023 static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1024     gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1025     gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1026 };
1027 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1028            a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
1029 
1030 /*
1031  *** SVE Bitwise Shift - Predicated Group
1032  */
1033 
1034 #define DO_ZPZW(NAME, name) \
1035     static gen_helper_gvec_4 * const name##_zpzw_fns[4] = {               \
1036         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
1037         gen_helper_sve_##name##_zpzw_s, NULL                              \
1038     };                                                                    \
1039     TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz,              \
1040                a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
1041 
1042 DO_ZPZW(ASR, asr)
1043 DO_ZPZW(LSR, lsr)
1044 DO_ZPZW(LSL, lsl)
1045 
1046 #undef DO_ZPZW
1047 
1048 /*
1049  *** SVE Bitwise Shift - Unpredicated Group
1050  */
1051 
1052 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1053                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1054                                          int64_t, uint32_t, uint32_t))
1055 {
1056     if (a->esz < 0) {
1057         /* Invalid tsz encoding -- see tszimm_esz. */
1058         return false;
1059     }
1060     if (sve_access_check(s)) {
1061         unsigned vsz = vec_full_reg_size(s);
1062         /* Shift by element size is architecturally valid.  For
1063            arithmetic right-shift, it's the same as by one less.
1064            Otherwise it is a zeroing operation.  */
1065         if (a->imm >= 8 << a->esz) {
1066             if (asr) {
1067                 a->imm = (8 << a->esz) - 1;
1068             } else {
1069                 do_dupi_z(s, a->rd, 0);
1070                 return true;
1071             }
1072         }
1073         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1074                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1075     }
1076     return true;
1077 }
1078 
1079 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1080 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1081 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
1082 
1083 #define DO_ZZW(NAME, name) \
1084     static gen_helper_gvec_3 * const name##_zzw_fns[4] = {                \
1085         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
1086         gen_helper_sve_##name##_zzw_s, NULL                               \
1087     };                                                                    \
1088     TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz,                      \
1089                name##_zzw_fns[a->esz], a, 0)
1090 
1091 DO_ZZW(ASR_zzw, asr)
1092 DO_ZZW(LSR_zzw, lsr)
1093 DO_ZZW(LSL_zzw, lsl)
1094 
1095 #undef DO_ZZW
1096 
1097 /*
1098  *** SVE Integer Multiply-Add Group
1099  */
1100 
1101 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1102                          gen_helper_gvec_5 *fn)
1103 {
1104     if (sve_access_check(s)) {
1105         unsigned vsz = vec_full_reg_size(s);
1106         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1107                            vec_full_reg_offset(s, a->ra),
1108                            vec_full_reg_offset(s, a->rn),
1109                            vec_full_reg_offset(s, a->rm),
1110                            pred_full_reg_offset(s, a->pg),
1111                            vsz, vsz, 0, fn);
1112     }
1113     return true;
1114 }
1115 
1116 static gen_helper_gvec_5 * const mla_fns[4] = {
1117     gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1118     gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1119 };
1120 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
1121 
1122 static gen_helper_gvec_5 * const mls_fns[4] = {
1123     gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1124     gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1125 };
1126 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
1127 
1128 /*
1129  *** SVE Index Generation Group
1130  */
1131 
1132 static bool do_index(DisasContext *s, int esz, int rd,
1133                      TCGv_i64 start, TCGv_i64 incr)
1134 {
1135     unsigned vsz;
1136     TCGv_i32 desc;
1137     TCGv_ptr t_zd;
1138 
1139     if (!sve_access_check(s)) {
1140         return true;
1141     }
1142 
1143     vsz = vec_full_reg_size(s);
1144     desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1145     t_zd = tcg_temp_new_ptr();
1146 
1147     tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd));
1148     if (esz == 3) {
1149         gen_helper_sve_index_d(t_zd, start, incr, desc);
1150     } else {
1151         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1152         static index_fn * const fns[3] = {
1153             gen_helper_sve_index_b,
1154             gen_helper_sve_index_h,
1155             gen_helper_sve_index_s,
1156         };
1157         TCGv_i32 s32 = tcg_temp_new_i32();
1158         TCGv_i32 i32 = tcg_temp_new_i32();
1159 
1160         tcg_gen_extrl_i64_i32(s32, start);
1161         tcg_gen_extrl_i64_i32(i32, incr);
1162         fns[esz](t_zd, s32, i32, desc);
1163     }
1164     return true;
1165 }
1166 
1167 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1168            tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1169 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1170            tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1171 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1172            cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1173 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1174            cpu_reg(s, a->rn), cpu_reg(s, a->rm))
1175 
1176 /*
1177  *** SVE Stack Allocation Group
1178  */
1179 
1180 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
1181 {
1182     if (!dc_isar_feature(aa64_sve, s)) {
1183         return false;
1184     }
1185     if (sve_access_check(s)) {
1186         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1187         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1188         tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1189     }
1190     return true;
1191 }
1192 
1193 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
1194 {
1195     if (!dc_isar_feature(aa64_sme, s)) {
1196         return false;
1197     }
1198     if (sme_enabled_check(s)) {
1199         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1200         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1201         tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
1202     }
1203     return true;
1204 }
1205 
1206 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
1207 {
1208     if (!dc_isar_feature(aa64_sve, s)) {
1209         return false;
1210     }
1211     if (sve_access_check(s)) {
1212         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1213         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1214         tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1215     }
1216     return true;
1217 }
1218 
1219 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
1220 {
1221     if (!dc_isar_feature(aa64_sme, s)) {
1222         return false;
1223     }
1224     if (sme_enabled_check(s)) {
1225         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1226         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1227         tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
1228     }
1229     return true;
1230 }
1231 
1232 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
1233 {
1234     if (!dc_isar_feature(aa64_sve, s)) {
1235         return false;
1236     }
1237     if (sve_access_check(s)) {
1238         TCGv_i64 reg = cpu_reg(s, a->rd);
1239         tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1240     }
1241     return true;
1242 }
1243 
1244 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
1245 {
1246     if (!dc_isar_feature(aa64_sme, s)) {
1247         return false;
1248     }
1249     if (sme_enabled_check(s)) {
1250         TCGv_i64 reg = cpu_reg(s, a->rd);
1251         tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
1252     }
1253     return true;
1254 }
1255 
1256 /*
1257  *** SVE Compute Vector Address Group
1258  */
1259 
1260 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1261 {
1262     return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
1263 }
1264 
1265 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1266 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1267 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1268 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
1269 
1270 /*
1271  *** SVE Integer Misc - Unpredicated Group
1272  */
1273 
1274 static gen_helper_gvec_2 * const fexpa_fns[4] = {
1275     NULL,                   gen_helper_sve_fexpa_h,
1276     gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1277 };
1278 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
1279                         fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah)
1280 
1281 static gen_helper_gvec_3 * const ftssel_fns[4] = {
1282     NULL,                    gen_helper_sve_ftssel_h,
1283     gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1284 };
1285 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
1286                         ftssel_fns[a->esz], a, s->fpcr_ah)
1287 
1288 /*
1289  *** SVE Predicate Logical Operations Group
1290  */
1291 
1292 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1293                           const GVecGen4 *gvec_op)
1294 {
1295     if (!sve_access_check(s)) {
1296         return true;
1297     }
1298 
1299     unsigned psz = pred_gvec_reg_size(s);
1300     int dofs = pred_full_reg_offset(s, a->rd);
1301     int nofs = pred_full_reg_offset(s, a->rn);
1302     int mofs = pred_full_reg_offset(s, a->rm);
1303     int gofs = pred_full_reg_offset(s, a->pg);
1304 
1305     if (!a->s) {
1306         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1307         return true;
1308     }
1309 
1310     if (psz == 8) {
1311         /* Do the operation and the flags generation in temps.  */
1312         TCGv_i64 pd = tcg_temp_new_i64();
1313         TCGv_i64 pn = tcg_temp_new_i64();
1314         TCGv_i64 pm = tcg_temp_new_i64();
1315         TCGv_i64 pg = tcg_temp_new_i64();
1316 
1317         tcg_gen_ld_i64(pn, tcg_env, nofs);
1318         tcg_gen_ld_i64(pm, tcg_env, mofs);
1319         tcg_gen_ld_i64(pg, tcg_env, gofs);
1320 
1321         gvec_op->fni8(pd, pn, pm, pg);
1322         tcg_gen_st_i64(pd, tcg_env, dofs);
1323 
1324         do_predtest1(pd, pg);
1325     } else {
1326         /* The operation and flags generation is large.  The computation
1327          * of the flags depends on the original contents of the guarding
1328          * predicate.  If the destination overwrites the guarding predicate,
1329          * then the easiest way to get this right is to save a copy.
1330           */
1331         int tofs = gofs;
1332         if (a->rd == a->pg) {
1333             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1334             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1335         }
1336 
1337         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1338         do_predtest(s, dofs, tofs, psz / 8);
1339     }
1340     return true;
1341 }
1342 
1343 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1344 {
1345     tcg_gen_and_i64(pd, pn, pm);
1346     tcg_gen_and_i64(pd, pd, pg);
1347 }
1348 
1349 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1350                            TCGv_vec pm, TCGv_vec pg)
1351 {
1352     tcg_gen_and_vec(vece, pd, pn, pm);
1353     tcg_gen_and_vec(vece, pd, pd, pg);
1354 }
1355 
1356 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1357 {
1358     static const GVecGen4 op = {
1359         .fni8 = gen_and_pg_i64,
1360         .fniv = gen_and_pg_vec,
1361         .fno = gen_helper_sve_and_pppp,
1362         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1363     };
1364 
1365     if (!dc_isar_feature(aa64_sve, s)) {
1366         return false;
1367     }
1368     if (!a->s) {
1369         if (a->rn == a->rm) {
1370             if (a->pg == a->rn) {
1371                 return do_mov_p(s, a->rd, a->rn);
1372             }
1373             return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1374         } else if (a->pg == a->rn || a->pg == a->rm) {
1375             return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1376         }
1377     }
1378     return do_pppp_flags(s, a, &op);
1379 }
1380 
1381 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1382 {
1383     tcg_gen_andc_i64(pd, pn, pm);
1384     tcg_gen_and_i64(pd, pd, pg);
1385 }
1386 
1387 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1388                            TCGv_vec pm, TCGv_vec pg)
1389 {
1390     tcg_gen_andc_vec(vece, pd, pn, pm);
1391     tcg_gen_and_vec(vece, pd, pd, pg);
1392 }
1393 
1394 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1395 {
1396     static const GVecGen4 op = {
1397         .fni8 = gen_bic_pg_i64,
1398         .fniv = gen_bic_pg_vec,
1399         .fno = gen_helper_sve_bic_pppp,
1400         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1401     };
1402 
1403     if (!dc_isar_feature(aa64_sve, s)) {
1404         return false;
1405     }
1406     if (!a->s && a->pg == a->rn) {
1407         return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1408     }
1409     return do_pppp_flags(s, a, &op);
1410 }
1411 
1412 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1413 {
1414     tcg_gen_xor_i64(pd, pn, pm);
1415     tcg_gen_and_i64(pd, pd, pg);
1416 }
1417 
1418 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1419                            TCGv_vec pm, TCGv_vec pg)
1420 {
1421     tcg_gen_xor_vec(vece, pd, pn, pm);
1422     tcg_gen_and_vec(vece, pd, pd, pg);
1423 }
1424 
1425 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1426 {
1427     static const GVecGen4 op = {
1428         .fni8 = gen_eor_pg_i64,
1429         .fniv = gen_eor_pg_vec,
1430         .fno = gen_helper_sve_eor_pppp,
1431         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1432     };
1433 
1434     if (!dc_isar_feature(aa64_sve, s)) {
1435         return false;
1436     }
1437     /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1438     if (!a->s && a->pg == a->rm) {
1439         return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1440     }
1441     return do_pppp_flags(s, a, &op);
1442 }
1443 
1444 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1445 {
1446     if (a->s || !dc_isar_feature(aa64_sve, s)) {
1447         return false;
1448     }
1449     if (sve_access_check(s)) {
1450         unsigned psz = pred_gvec_reg_size(s);
1451         tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1452                             pred_full_reg_offset(s, a->pg),
1453                             pred_full_reg_offset(s, a->rn),
1454                             pred_full_reg_offset(s, a->rm), psz, psz);
1455     }
1456     return true;
1457 }
1458 
1459 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1460 {
1461     tcg_gen_or_i64(pd, pn, pm);
1462     tcg_gen_and_i64(pd, pd, pg);
1463 }
1464 
1465 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1466                            TCGv_vec pm, TCGv_vec pg)
1467 {
1468     tcg_gen_or_vec(vece, pd, pn, pm);
1469     tcg_gen_and_vec(vece, pd, pd, pg);
1470 }
1471 
1472 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1473 {
1474     static const GVecGen4 op = {
1475         .fni8 = gen_orr_pg_i64,
1476         .fniv = gen_orr_pg_vec,
1477         .fno = gen_helper_sve_orr_pppp,
1478         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1479     };
1480 
1481     if (!dc_isar_feature(aa64_sve, s)) {
1482         return false;
1483     }
1484     if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1485         return do_mov_p(s, a->rd, a->rn);
1486     }
1487     return do_pppp_flags(s, a, &op);
1488 }
1489 
1490 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1491 {
1492     tcg_gen_orc_i64(pd, pn, pm);
1493     tcg_gen_and_i64(pd, pd, pg);
1494 }
1495 
1496 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1497                            TCGv_vec pm, TCGv_vec pg)
1498 {
1499     tcg_gen_orc_vec(vece, pd, pn, pm);
1500     tcg_gen_and_vec(vece, pd, pd, pg);
1501 }
1502 
1503 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1504 {
1505     static const GVecGen4 op = {
1506         .fni8 = gen_orn_pg_i64,
1507         .fniv = gen_orn_pg_vec,
1508         .fno = gen_helper_sve_orn_pppp,
1509         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1510     };
1511 
1512     if (!dc_isar_feature(aa64_sve, s)) {
1513         return false;
1514     }
1515     return do_pppp_flags(s, a, &op);
1516 }
1517 
1518 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1519 {
1520     tcg_gen_or_i64(pd, pn, pm);
1521     tcg_gen_andc_i64(pd, pg, pd);
1522 }
1523 
1524 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1525                            TCGv_vec pm, TCGv_vec pg)
1526 {
1527     tcg_gen_or_vec(vece, pd, pn, pm);
1528     tcg_gen_andc_vec(vece, pd, pg, pd);
1529 }
1530 
1531 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1532 {
1533     static const GVecGen4 op = {
1534         .fni8 = gen_nor_pg_i64,
1535         .fniv = gen_nor_pg_vec,
1536         .fno = gen_helper_sve_nor_pppp,
1537         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1538     };
1539 
1540     if (!dc_isar_feature(aa64_sve, s)) {
1541         return false;
1542     }
1543     return do_pppp_flags(s, a, &op);
1544 }
1545 
1546 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1547 {
1548     tcg_gen_and_i64(pd, pn, pm);
1549     tcg_gen_andc_i64(pd, pg, pd);
1550 }
1551 
1552 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1553                            TCGv_vec pm, TCGv_vec pg)
1554 {
1555     tcg_gen_and_vec(vece, pd, pn, pm);
1556     tcg_gen_andc_vec(vece, pd, pg, pd);
1557 }
1558 
1559 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1560 {
1561     static const GVecGen4 op = {
1562         .fni8 = gen_nand_pg_i64,
1563         .fniv = gen_nand_pg_vec,
1564         .fno = gen_helper_sve_nand_pppp,
1565         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1566     };
1567 
1568     if (!dc_isar_feature(aa64_sve, s)) {
1569         return false;
1570     }
1571     return do_pppp_flags(s, a, &op);
1572 }
1573 
1574 /*
1575  *** SVE Predicate Misc Group
1576  */
1577 
1578 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1579 {
1580     if (!dc_isar_feature(aa64_sve, s)) {
1581         return false;
1582     }
1583     if (sve_access_check(s)) {
1584         int nofs = pred_full_reg_offset(s, a->rn);
1585         int gofs = pred_full_reg_offset(s, a->pg);
1586         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1587 
1588         if (words == 1) {
1589             TCGv_i64 pn = tcg_temp_new_i64();
1590             TCGv_i64 pg = tcg_temp_new_i64();
1591 
1592             tcg_gen_ld_i64(pn, tcg_env, nofs);
1593             tcg_gen_ld_i64(pg, tcg_env, gofs);
1594             do_predtest1(pn, pg);
1595         } else {
1596             do_predtest(s, nofs, gofs, words);
1597         }
1598     }
1599     return true;
1600 }
1601 
1602 /* See the ARM pseudocode DecodePredCount.  */
1603 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1604 {
1605     unsigned elements = fullsz >> esz;
1606     unsigned bound;
1607 
1608     switch (pattern) {
1609     case 0x0: /* POW2 */
1610         return pow2floor(elements);
1611     case 0x1: /* VL1 */
1612     case 0x2: /* VL2 */
1613     case 0x3: /* VL3 */
1614     case 0x4: /* VL4 */
1615     case 0x5: /* VL5 */
1616     case 0x6: /* VL6 */
1617     case 0x7: /* VL7 */
1618     case 0x8: /* VL8 */
1619         bound = pattern;
1620         break;
1621     case 0x9: /* VL16 */
1622     case 0xa: /* VL32 */
1623     case 0xb: /* VL64 */
1624     case 0xc: /* VL128 */
1625     case 0xd: /* VL256 */
1626         bound = 16 << (pattern - 9);
1627         break;
1628     case 0x1d: /* MUL4 */
1629         return elements - elements % 4;
1630     case 0x1e: /* MUL3 */
1631         return elements - elements % 3;
1632     case 0x1f: /* ALL */
1633         return elements;
1634     default:   /* #uimm5 */
1635         return 0;
1636     }
1637     return elements >= bound ? bound : 0;
1638 }
1639 
1640 /* This handles all of the predicate initialization instructions,
1641  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1642  * so that decode_pred_count returns 0.  For SETFFR, we will have
1643  * set RD == 16 == FFR.
1644  */
1645 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1646 {
1647     if (!sve_access_check(s)) {
1648         return true;
1649     }
1650 
1651     unsigned fullsz = vec_full_reg_size(s);
1652     unsigned ofs = pred_full_reg_offset(s, rd);
1653     unsigned numelem, setsz, i;
1654     uint64_t word, lastword;
1655     TCGv_i64 t;
1656 
1657     numelem = decode_pred_count(fullsz, pat, esz);
1658 
1659     /* Determine what we must store into each bit, and how many.  */
1660     if (numelem == 0) {
1661         lastword = word = 0;
1662         setsz = fullsz;
1663     } else {
1664         setsz = numelem << esz;
1665         lastword = word = pred_esz_masks[esz];
1666         if (setsz % 64) {
1667             lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1668         }
1669     }
1670 
1671     t = tcg_temp_new_i64();
1672     if (fullsz <= 64) {
1673         tcg_gen_movi_i64(t, lastword);
1674         tcg_gen_st_i64(t, tcg_env, ofs);
1675         goto done;
1676     }
1677 
1678     if (word == lastword) {
1679         unsigned maxsz = size_for_gvec(fullsz / 8);
1680         unsigned oprsz = size_for_gvec(setsz / 8);
1681 
1682         if (oprsz * 8 == setsz) {
1683             tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1684             goto done;
1685         }
1686     }
1687 
1688     setsz /= 8;
1689     fullsz /= 8;
1690 
1691     tcg_gen_movi_i64(t, word);
1692     for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1693         tcg_gen_st_i64(t, tcg_env, ofs + i);
1694     }
1695     if (lastword != word) {
1696         tcg_gen_movi_i64(t, lastword);
1697         tcg_gen_st_i64(t, tcg_env, ofs + i);
1698         i += 8;
1699     }
1700     if (i < fullsz) {
1701         tcg_gen_movi_i64(t, 0);
1702         for (; i < fullsz; i += 8) {
1703             tcg_gen_st_i64(t, tcg_env, ofs + i);
1704         }
1705     }
1706 
1707  done:
1708     /* PTRUES */
1709     if (setflag) {
1710         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1711         tcg_gen_movi_i32(cpu_CF, word == 0);
1712         tcg_gen_movi_i32(cpu_VF, 0);
1713         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1714     }
1715     return true;
1716 }
1717 
1718 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
1719 
1720 static bool trans_PTRUE_cnt(DisasContext *s, arg_PTRUE_cnt *a)
1721 {
1722     if (!dc_isar_feature(aa64_sme2_or_sve2p1, s)) {
1723         return false;
1724     }
1725     if (sve_access_check(s)) {
1726         /* Canonical TRUE is 0 count, invert bit, plus element size. */
1727         int val = (1 << 15) | (1 << a->esz);
1728 
1729         /* Write val to the first uint64_t; clear all of the rest. */
1730         tcg_gen_gvec_dup_imm(MO_64, pred_full_reg_offset(s, a->rd),
1731                              8, size_for_gvec(pred_full_reg_size(s)), val);
1732     }
1733     return true;
1734 }
1735 
1736 /* Note pat == 31 is #all, to set all elements.  */
1737 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
1738                         do_predset, 0, FFR_PRED_NUM, 31, false)
1739 
1740 /* Note pat == 32 is #unimp, to set no elements.  */
1741 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
1742 
1743 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1744 {
1745     /* The path through do_pppp_flags is complicated enough to want to avoid
1746      * duplication.  Frob the arguments into the form of a predicated AND.
1747      */
1748     arg_rprr_s alt_a = {
1749         .rd = a->rd, .pg = a->pg, .s = a->s,
1750         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1751     };
1752 
1753     s->is_nonstreaming = true;
1754     return trans_AND_pppp(s, &alt_a);
1755 }
1756 
1757 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1758 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
1759 
1760 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1761                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1762                                            TCGv_ptr, TCGv_i32))
1763 {
1764     if (!sve_access_check(s)) {
1765         return true;
1766     }
1767 
1768     TCGv_ptr t_pd = tcg_temp_new_ptr();
1769     TCGv_ptr t_pg = tcg_temp_new_ptr();
1770     TCGv_i32 t;
1771     unsigned desc = 0;
1772 
1773     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1774     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
1775 
1776     tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd));
1777     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn));
1778     t = tcg_temp_new_i32();
1779 
1780     gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
1781 
1782     do_pred_flags(t);
1783     return true;
1784 }
1785 
1786 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1787 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
1788 
1789 /*
1790  *** SVE Element Count Group
1791  */
1792 
1793 /* Perform an inline saturating addition of a 32-bit value within
1794  * a 64-bit register.  The second operand is known to be positive,
1795  * which halves the comparisons we must perform to bound the result.
1796  */
1797 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1798 {
1799     int64_t ibound;
1800 
1801     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1802     if (u) {
1803         tcg_gen_ext32u_i64(reg, reg);
1804     } else {
1805         tcg_gen_ext32s_i64(reg, reg);
1806     }
1807     if (d) {
1808         tcg_gen_sub_i64(reg, reg, val);
1809         ibound = (u ? 0 : INT32_MIN);
1810         tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
1811     } else {
1812         tcg_gen_add_i64(reg, reg, val);
1813         ibound = (u ? UINT32_MAX : INT32_MAX);
1814         tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
1815     }
1816 }
1817 
1818 /* Similarly with 64-bit values.  */
1819 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1820 {
1821     TCGv_i64 t0 = tcg_temp_new_i64();
1822     TCGv_i64 t2;
1823 
1824     if (u) {
1825         if (d) {
1826             tcg_gen_sub_i64(t0, reg, val);
1827             t2 = tcg_constant_i64(0);
1828             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
1829         } else {
1830             tcg_gen_add_i64(t0, reg, val);
1831             t2 = tcg_constant_i64(-1);
1832             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
1833         }
1834     } else {
1835         TCGv_i64 t1 = tcg_temp_new_i64();
1836         if (d) {
1837             /* Detect signed overflow for subtraction.  */
1838             tcg_gen_xor_i64(t0, reg, val);
1839             tcg_gen_sub_i64(t1, reg, val);
1840             tcg_gen_xor_i64(reg, reg, t1);
1841             tcg_gen_and_i64(t0, t0, reg);
1842 
1843             /* Bound the result.  */
1844             tcg_gen_movi_i64(reg, INT64_MIN);
1845             t2 = tcg_constant_i64(0);
1846             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1847         } else {
1848             /* Detect signed overflow for addition.  */
1849             tcg_gen_xor_i64(t0, reg, val);
1850             tcg_gen_add_i64(reg, reg, val);
1851             tcg_gen_xor_i64(t1, reg, val);
1852             tcg_gen_andc_i64(t0, t1, t0);
1853 
1854             /* Bound the result.  */
1855             tcg_gen_movi_i64(t1, INT64_MAX);
1856             t2 = tcg_constant_i64(0);
1857             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1858         }
1859     }
1860 }
1861 
1862 /* Similarly with a vector and a scalar operand.  */
1863 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1864                               TCGv_i64 val, bool u, bool d)
1865 {
1866     unsigned vsz = vec_full_reg_size(s);
1867     TCGv_ptr dptr, nptr;
1868     TCGv_i32 t32, desc;
1869     TCGv_i64 t64;
1870 
1871     dptr = tcg_temp_new_ptr();
1872     nptr = tcg_temp_new_ptr();
1873     tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd));
1874     tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn));
1875     desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1876 
1877     switch (esz) {
1878     case MO_8:
1879         t32 = tcg_temp_new_i32();
1880         tcg_gen_extrl_i64_i32(t32, val);
1881         if (d) {
1882             tcg_gen_neg_i32(t32, t32);
1883         }
1884         if (u) {
1885             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1886         } else {
1887             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1888         }
1889         break;
1890 
1891     case MO_16:
1892         t32 = tcg_temp_new_i32();
1893         tcg_gen_extrl_i64_i32(t32, val);
1894         if (d) {
1895             tcg_gen_neg_i32(t32, t32);
1896         }
1897         if (u) {
1898             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1899         } else {
1900             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1901         }
1902         break;
1903 
1904     case MO_32:
1905         t64 = tcg_temp_new_i64();
1906         if (d) {
1907             tcg_gen_neg_i64(t64, val);
1908         } else {
1909             tcg_gen_mov_i64(t64, val);
1910         }
1911         if (u) {
1912             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1913         } else {
1914             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1915         }
1916         break;
1917 
1918     case MO_64:
1919         if (u) {
1920             if (d) {
1921                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1922             } else {
1923                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1924             }
1925         } else if (d) {
1926             t64 = tcg_temp_new_i64();
1927             tcg_gen_neg_i64(t64, val);
1928             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1929         } else {
1930             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1931         }
1932         break;
1933 
1934     default:
1935         g_assert_not_reached();
1936     }
1937 }
1938 
1939 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1940 {
1941     if (!dc_isar_feature(aa64_sve, s)) {
1942         return false;
1943     }
1944     if (sve_access_check(s)) {
1945         unsigned fullsz = vec_full_reg_size(s);
1946         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1947         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1948     }
1949     return true;
1950 }
1951 
1952 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1953 {
1954     if (!dc_isar_feature(aa64_sve, s)) {
1955         return false;
1956     }
1957     if (sve_access_check(s)) {
1958         unsigned fullsz = vec_full_reg_size(s);
1959         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1960         int inc = numelem * a->imm * (a->d ? -1 : 1);
1961         TCGv_i64 reg = cpu_reg(s, a->rd);
1962 
1963         tcg_gen_addi_i64(reg, reg, inc);
1964     }
1965     return true;
1966 }
1967 
1968 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1969 {
1970     if (!dc_isar_feature(aa64_sve, s)) {
1971         return false;
1972     }
1973     if (!sve_access_check(s)) {
1974         return true;
1975     }
1976 
1977     unsigned fullsz = vec_full_reg_size(s);
1978     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1979     int inc = numelem * a->imm;
1980     TCGv_i64 reg = cpu_reg(s, a->rd);
1981 
1982     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1983     if (inc == 0) {
1984         if (a->u) {
1985             tcg_gen_ext32u_i64(reg, reg);
1986         } else {
1987             tcg_gen_ext32s_i64(reg, reg);
1988         }
1989     } else {
1990         do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
1991     }
1992     return true;
1993 }
1994 
1995 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1996 {
1997     if (!dc_isar_feature(aa64_sve, s)) {
1998         return false;
1999     }
2000     if (!sve_access_check(s)) {
2001         return true;
2002     }
2003 
2004     unsigned fullsz = vec_full_reg_size(s);
2005     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2006     int inc = numelem * a->imm;
2007     TCGv_i64 reg = cpu_reg(s, a->rd);
2008 
2009     if (inc != 0) {
2010         do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
2011     }
2012     return true;
2013 }
2014 
2015 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2016 {
2017     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2018         return false;
2019     }
2020 
2021     unsigned fullsz = vec_full_reg_size(s);
2022     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2023     int inc = numelem * a->imm;
2024 
2025     if (inc != 0) {
2026         if (sve_access_check(s)) {
2027             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2028                               vec_full_reg_offset(s, a->rn),
2029                               tcg_constant_i64(a->d ? -inc : inc),
2030                               fullsz, fullsz);
2031         }
2032     } else {
2033         do_mov_z(s, a->rd, a->rn);
2034     }
2035     return true;
2036 }
2037 
2038 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2039 {
2040     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2041         return false;
2042     }
2043 
2044     unsigned fullsz = vec_full_reg_size(s);
2045     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2046     int inc = numelem * a->imm;
2047 
2048     if (inc != 0) {
2049         if (sve_access_check(s)) {
2050             do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2051                               tcg_constant_i64(inc), a->u, a->d);
2052         }
2053     } else {
2054         do_mov_z(s, a->rd, a->rn);
2055     }
2056     return true;
2057 }
2058 
2059 /*
2060  *** SVE Bitwise Immediate Group
2061  */
2062 
2063 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2064 {
2065     uint64_t imm;
2066     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2067                                 extract32(a->dbm, 0, 6),
2068                                 extract32(a->dbm, 6, 6))) {
2069         return false;
2070     }
2071     return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
2072 }
2073 
2074 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2075 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2076 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
2077 
2078 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
2079 {
2080     uint64_t imm;
2081 
2082     if (!dc_isar_feature(aa64_sve, s)) {
2083         return false;
2084     }
2085     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2086                                 extract32(a->dbm, 0, 6),
2087                                 extract32(a->dbm, 6, 6))) {
2088         return false;
2089     }
2090     if (sve_access_check(s)) {
2091         do_dupi_z(s, a->rd, imm);
2092     }
2093     return true;
2094 }
2095 
2096 /*
2097  *** SVE Integer Wide Immediate - Predicated Group
2098  */
2099 
2100 /* Implement all merging copies.  This is used for CPY (immediate),
2101  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2102  */
2103 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2104                      TCGv_i64 val)
2105 {
2106     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2107     static gen_cpy * const fns[4] = {
2108         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2109         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2110     };
2111     unsigned vsz = vec_full_reg_size(s);
2112     TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2113     TCGv_ptr t_zd = tcg_temp_new_ptr();
2114     TCGv_ptr t_zn = tcg_temp_new_ptr();
2115     TCGv_ptr t_pg = tcg_temp_new_ptr();
2116 
2117     tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd));
2118     tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn));
2119     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
2120 
2121     fns[esz](t_zd, t_zn, t_pg, val, desc);
2122 }
2123 
2124 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
2125 {
2126     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2127         return false;
2128     }
2129     if (sve_access_check(s)) {
2130         /* Decode the VFP immediate.  */
2131         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2132         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
2133     }
2134     return true;
2135 }
2136 
2137 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
2138 {
2139     if (!dc_isar_feature(aa64_sve, s)) {
2140         return false;
2141     }
2142     if (sve_access_check(s)) {
2143         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
2144     }
2145     return true;
2146 }
2147 
2148 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
2149 {
2150     static gen_helper_gvec_2i * const fns[4] = {
2151         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2152         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2153     };
2154 
2155     if (!dc_isar_feature(aa64_sve, s)) {
2156         return false;
2157     }
2158     if (sve_access_check(s)) {
2159         unsigned vsz = vec_full_reg_size(s);
2160         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2161                             pred_full_reg_offset(s, a->pg),
2162                             tcg_constant_i64(a->imm),
2163                             vsz, vsz, 0, fns[a->esz]);
2164     }
2165     return true;
2166 }
2167 
2168 /*
2169  *** SVE Permute Extract Group
2170  */
2171 
2172 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
2173 {
2174     if (!sve_access_check(s)) {
2175         return true;
2176     }
2177 
2178     unsigned vsz = vec_full_reg_size(s);
2179     unsigned n_ofs = imm >= vsz ? 0 : imm;
2180     unsigned n_siz = vsz - n_ofs;
2181     unsigned d = vec_full_reg_offset(s, rd);
2182     unsigned n = vec_full_reg_offset(s, rn);
2183     unsigned m = vec_full_reg_offset(s, rm);
2184 
2185     /* Use host vector move insns if we have appropriate sizes
2186      * and no unfortunate overlap.
2187      */
2188     if (m != d
2189         && n_ofs == size_for_gvec(n_ofs)
2190         && n_siz == size_for_gvec(n_siz)
2191         && (d != n || n_siz <= n_ofs)) {
2192         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2193         if (n_ofs != 0) {
2194             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2195         }
2196     } else {
2197         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2198     }
2199     return true;
2200 }
2201 
2202 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2203 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
2204 
2205 static bool trans_EXTQ(DisasContext *s, arg_EXTQ *a)
2206 {
2207     unsigned vl, dofs, sofs0, sofs1, sofs2, imm;
2208 
2209     if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
2210         return false;
2211     }
2212     if (!sve_access_check(s)) {
2213         return true;
2214     }
2215 
2216     imm = a->imm;
2217     if (imm == 0) {
2218         /* So far we never optimize Zdn with MOVPRFX, so zd = zn is a nop. */
2219         return true;
2220     }
2221 
2222     vl = vec_full_reg_size(s);
2223     dofs = vec_full_reg_offset(s, a->rd);
2224     sofs2 = vec_full_reg_offset(s, a->rn);
2225 
2226     if (imm & 8) {
2227         sofs0 = dofs + 8;
2228         sofs1 = sofs2;
2229         sofs2 += 8;
2230     } else {
2231         sofs0 = dofs;
2232         sofs1 = dofs + 8;
2233     }
2234     imm = (imm & 7) << 3;
2235 
2236     for (unsigned i = 0; i < vl; i += 16) {
2237         TCGv_i64 s0 = tcg_temp_new_i64();
2238         TCGv_i64 s1 = tcg_temp_new_i64();
2239         TCGv_i64 s2 = tcg_temp_new_i64();
2240 
2241         tcg_gen_ld_i64(s0, tcg_env, sofs0 + i);
2242         tcg_gen_ld_i64(s1, tcg_env, sofs1 + i);
2243         tcg_gen_ld_i64(s2, tcg_env, sofs2 + i);
2244 
2245         tcg_gen_extract2_i64(s0, s0, s1, imm);
2246         tcg_gen_extract2_i64(s1, s1, s2, imm);
2247 
2248         tcg_gen_st_i64(s0, tcg_env, dofs + i);
2249         tcg_gen_st_i64(s1, tcg_env, dofs + i + 8);
2250     }
2251     return true;
2252 }
2253 
2254 /*
2255  *** SVE Permute - Unpredicated Group
2256  */
2257 
2258 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2259 {
2260     if (!dc_isar_feature(aa64_sve, s)) {
2261         return false;
2262     }
2263     if (sve_access_check(s)) {
2264         unsigned vsz = vec_full_reg_size(s);
2265         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2266                              vsz, vsz, cpu_reg_sp(s, a->rn));
2267     }
2268     return true;
2269 }
2270 
2271 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2272 {
2273     if (!dc_isar_feature(aa64_sve, s)) {
2274         return false;
2275     }
2276     if ((a->imm & 0x1f) == 0) {
2277         return false;
2278     }
2279     if (sve_access_check(s)) {
2280         unsigned vsz = vec_full_reg_size(s);
2281         unsigned dofs = vec_full_reg_offset(s, a->rd);
2282         unsigned esz, index;
2283 
2284         esz = ctz32(a->imm);
2285         index = a->imm >> (esz + 1);
2286 
2287         if ((index << esz) < vsz) {
2288             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2289             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2290         } else {
2291             /*
2292              * While dup_mem handles 128-bit elements, dup_imm does not.
2293              * Thankfully element size doesn't matter for splatting zero.
2294              */
2295             tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2296         }
2297     }
2298     return true;
2299 }
2300 
2301 static bool trans_DUPQ(DisasContext *s, arg_DUPQ *a)
2302 {
2303     unsigned vl, dofs, nofs;
2304 
2305     if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
2306         return false;
2307     }
2308     if (!sve_access_check(s)) {
2309         return true;
2310     }
2311 
2312     vl = vec_full_reg_size(s);
2313     dofs = vec_full_reg_offset(s, a->rd);
2314     nofs = vec_reg_offset(s, a->rn, a->imm, a->esz);
2315 
2316     for (unsigned i = 0; i < vl; i += 16) {
2317         tcg_gen_gvec_dup_mem(a->esz, dofs + i, nofs + i, 16, 16);
2318     }
2319     return true;
2320 }
2321 
2322 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2323 {
2324     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2325     static gen_insr * const fns[4] = {
2326         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2327         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2328     };
2329     unsigned vsz = vec_full_reg_size(s);
2330     TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2331     TCGv_ptr t_zd = tcg_temp_new_ptr();
2332     TCGv_ptr t_zn = tcg_temp_new_ptr();
2333 
2334     tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd));
2335     tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
2336 
2337     fns[a->esz](t_zd, t_zn, val, desc);
2338 }
2339 
2340 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2341 {
2342     if (!dc_isar_feature(aa64_sve, s)) {
2343         return false;
2344     }
2345     if (sve_access_check(s)) {
2346         TCGv_i64 t = tcg_temp_new_i64();
2347         tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64));
2348         do_insr_i64(s, a, t);
2349     }
2350     return true;
2351 }
2352 
2353 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2354 {
2355     if (!dc_isar_feature(aa64_sve, s)) {
2356         return false;
2357     }
2358     if (sve_access_check(s)) {
2359         do_insr_i64(s, a, cpu_reg(s, a->rm));
2360     }
2361     return true;
2362 }
2363 
2364 static gen_helper_gvec_2 * const rev_fns[4] = {
2365     gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2366     gen_helper_sve_rev_s, gen_helper_sve_rev_d
2367 };
2368 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
2369 
2370 static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2371     gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2372     gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2373 };
2374 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
2375 
2376 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2377     gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2378     gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2379 };
2380 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2381            a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
2382 
2383 static gen_helper_gvec_3 * const tblq_fns[4] = {
2384     gen_helper_sve2p1_tblq_b, gen_helper_sve2p1_tblq_h,
2385     gen_helper_sve2p1_tblq_s, gen_helper_sve2p1_tblq_d
2386 };
2387 TRANS_FEAT(TBLQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
2388            tblq_fns[a->esz], a, 0)
2389 
2390 static gen_helper_gvec_3 * const tbx_fns[4] = {
2391     gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2392     gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2393 };
2394 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
2395 
2396 static gen_helper_gvec_3 * const tbxq_fns[4] = {
2397     gen_helper_sve2p1_tbxq_b, gen_helper_sve2p1_tbxq_h,
2398     gen_helper_sve2p1_tbxq_s, gen_helper_sve2p1_tbxq_d
2399 };
2400 TRANS_FEAT(TBXQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
2401            tbxq_fns[a->esz], a, 0)
2402 
2403 static bool trans_PMOV_pv(DisasContext *s, arg_PMOV_pv *a)
2404 {
2405     static gen_helper_gvec_2 * const fns[4] = {
2406         NULL,                 gen_helper_pmov_pv_h,
2407         gen_helper_pmov_pv_s, gen_helper_pmov_pv_d
2408     };
2409     unsigned vl, pl, vofs, pofs;
2410     TCGv_i64 tmp;
2411 
2412     if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
2413         return false;
2414     }
2415     if (!sve_access_check(s)) {
2416         return true;
2417     }
2418 
2419     vl = vec_full_reg_size(s);
2420     if (a->esz != MO_8) {
2421         tcg_gen_gvec_2_ool(pred_full_reg_offset(s, a->rd),
2422                            vec_full_reg_offset(s, a->rn),
2423                            vl, vl, a->imm, fns[a->esz]);
2424         return true;
2425     }
2426 
2427     /*
2428      * Copy the low PL bytes from vector Zn, zero-extending to a
2429      * multiple of 8 bytes, so that Pd is properly cleared.
2430      */
2431 
2432     pl = vl / 8;
2433     pofs = pred_full_reg_offset(s, a->rd);
2434     vofs = vec_full_reg_offset(s, a->rn);
2435 
2436     QEMU_BUILD_BUG_ON(sizeof(ARMPredicateReg) != 32);
2437     for (unsigned i = 32; i >= 8; i >>= 1) {
2438         if (pl & i) {
2439             tcg_gen_gvec_mov(MO_64, pofs, vofs, i, i);
2440             pofs += i;
2441             vofs += i;
2442         }
2443     }
2444     switch (pl & 7) {
2445     case 0:
2446         return true;
2447     case 2:
2448         tmp = tcg_temp_new_i64();
2449         tcg_gen_ld16u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 6 : 0));
2450         break;
2451     case 4:
2452         tmp = tcg_temp_new_i64();
2453         tcg_gen_ld32u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 4 : 0));
2454         break;
2455     case 6:
2456         tmp = tcg_temp_new_i64();
2457         tcg_gen_ld_i64(tmp, tcg_env, vofs);
2458         tcg_gen_extract_i64(tmp, tmp, 0, 48);
2459         break;
2460     default:
2461         g_assert_not_reached();
2462     }
2463     tcg_gen_st_i64(tmp, tcg_env, pofs);
2464     return true;
2465 }
2466 
2467 static bool trans_PMOV_vp(DisasContext *s, arg_PMOV_pv *a)
2468 {
2469     static gen_helper_gvec_2 * const fns[4] = {
2470         NULL,                 gen_helper_pmov_vp_h,
2471         gen_helper_pmov_vp_s, gen_helper_pmov_vp_d
2472     };
2473     unsigned vl;
2474 
2475     if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
2476         return false;
2477     }
2478     if (!sve_access_check(s)) {
2479         return true;
2480     }
2481 
2482     vl = vec_full_reg_size(s);
2483 
2484     if (a->esz == MO_8) {
2485         /*
2486          * The low PL bytes are copied from Pn to Zd unchanged.
2487          * We know that the unused portion of Pn is zero, and
2488          * that imm == 0, so the balance of Zd must be zeroed.
2489          */
2490         tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, a->rd),
2491                          pred_full_reg_offset(s, a->rn),
2492                          size_for_gvec(vl / 8), vl);
2493     } else {
2494         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2495                            pred_full_reg_offset(s, a->rn),
2496                            vl, vl, a->imm, fns[a->esz]);
2497     }
2498     return true;
2499 }
2500 
2501 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2502 {
2503     static gen_helper_gvec_2 * const fns[4][2] = {
2504         { NULL, NULL },
2505         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2506         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2507         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2508     };
2509 
2510     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2511         return false;
2512     }
2513     if (sve_access_check(s)) {
2514         unsigned vsz = vec_full_reg_size(s);
2515         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2516                            vec_full_reg_offset(s, a->rn)
2517                            + (a->h ? vsz / 2 : 0),
2518                            vsz, vsz, 0, fns[a->esz][a->u]);
2519     }
2520     return true;
2521 }
2522 
2523 /*
2524  *** SVE Permute - Predicates Group
2525  */
2526 
2527 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2528                           gen_helper_gvec_3 *fn)
2529 {
2530     if (!sve_access_check(s)) {
2531         return true;
2532     }
2533 
2534     unsigned vsz = pred_full_reg_size(s);
2535 
2536     TCGv_ptr t_d = tcg_temp_new_ptr();
2537     TCGv_ptr t_n = tcg_temp_new_ptr();
2538     TCGv_ptr t_m = tcg_temp_new_ptr();
2539     uint32_t desc = 0;
2540 
2541     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2542     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2543     desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2544 
2545     tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd));
2546     tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn));
2547     tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm));
2548 
2549     fn(t_d, t_n, t_m, tcg_constant_i32(desc));
2550     return true;
2551 }
2552 
2553 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2554                           gen_helper_gvec_2 *fn)
2555 {
2556     if (!sve_access_check(s)) {
2557         return true;
2558     }
2559 
2560     unsigned vsz = pred_full_reg_size(s);
2561     TCGv_ptr t_d = tcg_temp_new_ptr();
2562     TCGv_ptr t_n = tcg_temp_new_ptr();
2563     uint32_t desc = 0;
2564 
2565     tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd));
2566     tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn));
2567 
2568     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2569     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2570     desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2571 
2572     fn(t_d, t_n, tcg_constant_i32(desc));
2573     return true;
2574 }
2575 
2576 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2577 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2578 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2579 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2580 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2581 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
2582 
2583 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2584 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2585 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
2586 
2587 /*
2588  *** SVE Permute - Interleaving Group
2589  */
2590 
2591 static bool do_interleave_q(DisasContext *s, gen_helper_gvec_3 *fn,
2592                             arg_rrr_esz *a, int data)
2593 {
2594     if (sve_access_check(s)) {
2595         unsigned vsz = vec_full_reg_size(s);
2596         if (vsz < 32) {
2597             unallocated_encoding(s);
2598         } else {
2599             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2600                                vec_full_reg_offset(s, a->rn),
2601                                vec_full_reg_offset(s, a->rm),
2602                                vsz, vsz, data, fn);
2603         }
2604     }
2605     return true;
2606 }
2607 
2608 static gen_helper_gvec_3 * const zip_fns[4] = {
2609     gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2610     gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2611 };
2612 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2613            zip_fns[a->esz], a, 0)
2614 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2615            zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2616 
2617 TRANS_FEAT_NONSTREAMING(ZIP1_q, aa64_sve_f64mm, do_interleave_q,
2618                         gen_helper_sve2_zip_q, a, 0)
2619 TRANS_FEAT_NONSTREAMING(ZIP2_q, aa64_sve_f64mm, do_interleave_q,
2620                         gen_helper_sve2_zip_q, a,
2621                         QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
2622 
2623 static gen_helper_gvec_3 * const zipq_fns[4] = {
2624     gen_helper_sve2p1_zipq_b, gen_helper_sve2p1_zipq_h,
2625     gen_helper_sve2p1_zipq_s, gen_helper_sve2p1_zipq_d,
2626 };
2627 TRANS_FEAT(ZIPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
2628            zipq_fns[a->esz], a, 0)
2629 TRANS_FEAT(ZIPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
2630            zipq_fns[a->esz], a, 16 / 2)
2631 
2632 static gen_helper_gvec_3 * const uzp_fns[4] = {
2633     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2634     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2635 };
2636 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2637            uzp_fns[a->esz], a, 0)
2638 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2639            uzp_fns[a->esz], a, 1 << a->esz)
2640 
2641 TRANS_FEAT_NONSTREAMING(UZP1_q, aa64_sve_f64mm, do_interleave_q,
2642                         gen_helper_sve2_uzp_q, a, 0)
2643 TRANS_FEAT_NONSTREAMING(UZP2_q, aa64_sve_f64mm, do_interleave_q,
2644                         gen_helper_sve2_uzp_q, a, 16)
2645 
2646 static gen_helper_gvec_3 * const uzpq_fns[4] = {
2647     gen_helper_sve2p1_uzpq_b, gen_helper_sve2p1_uzpq_h,
2648     gen_helper_sve2p1_uzpq_s, gen_helper_sve2p1_uzpq_d,
2649 };
2650 TRANS_FEAT(UZPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
2651            uzpq_fns[a->esz], a, 0)
2652 TRANS_FEAT(UZPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
2653            uzpq_fns[a->esz], a, 1 << a->esz)
2654 
2655 static gen_helper_gvec_3 * const trn_fns[4] = {
2656     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2657     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2658 };
2659 
2660 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2661            trn_fns[a->esz], a, 0)
2662 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2663            trn_fns[a->esz], a, 1 << a->esz)
2664 
2665 TRANS_FEAT_NONSTREAMING(TRN1_q, aa64_sve_f64mm, do_interleave_q,
2666                         gen_helper_sve2_trn_q, a, 0)
2667 TRANS_FEAT_NONSTREAMING(TRN2_q, aa64_sve_f64mm, do_interleave_q,
2668                         gen_helper_sve2_trn_q, a, 16)
2669 
2670 /*
2671  *** SVE Permute Vector - Predicated Group
2672  */
2673 
2674 static gen_helper_gvec_3 * const compact_fns[4] = {
2675     NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2676 };
2677 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
2678                         compact_fns[a->esz], a, 0)
2679 
2680 /* Call the helper that computes the ARM LastActiveElement pseudocode
2681  * function, scaled by the element size.  This includes the not found
2682  * indication; e.g. not found for esz=3 is -8.
2683  */
2684 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2685 {
2686     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2687      * round up, as we do elsewhere, because we need the exact size.
2688      */
2689     TCGv_ptr t_p = tcg_temp_new_ptr();
2690     unsigned desc = 0;
2691 
2692     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2693     desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2694 
2695     tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg));
2696 
2697     gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
2698 }
2699 
2700 /* Increment LAST to the offset of the next element in the vector,
2701  * wrapping around to 0.
2702  */
2703 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2704 {
2705     unsigned vsz = vec_full_reg_size(s);
2706 
2707     tcg_gen_addi_i32(last, last, 1 << esz);
2708     if (is_power_of_2(vsz)) {
2709         tcg_gen_andi_i32(last, last, vsz - 1);
2710     } else {
2711         TCGv_i32 max = tcg_constant_i32(vsz);
2712         TCGv_i32 zero = tcg_constant_i32(0);
2713         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2714     }
2715 }
2716 
2717 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2718 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2719 {
2720     unsigned vsz = vec_full_reg_size(s);
2721 
2722     if (is_power_of_2(vsz)) {
2723         tcg_gen_andi_i32(last, last, vsz - 1);
2724     } else {
2725         TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2726         TCGv_i32 zero = tcg_constant_i32(0);
2727         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2728     }
2729 }
2730 
2731 /* Load an unsigned element of ESZ from BASE+OFS.  */
2732 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2733 {
2734     TCGv_i64 r = tcg_temp_new_i64();
2735 
2736     switch (esz) {
2737     case 0:
2738         tcg_gen_ld8u_i64(r, base, ofs);
2739         break;
2740     case 1:
2741         tcg_gen_ld16u_i64(r, base, ofs);
2742         break;
2743     case 2:
2744         tcg_gen_ld32u_i64(r, base, ofs);
2745         break;
2746     case 3:
2747         tcg_gen_ld_i64(r, base, ofs);
2748         break;
2749     default:
2750         g_assert_not_reached();
2751     }
2752     return r;
2753 }
2754 
2755 /* Load an unsigned element of ESZ from RM[LAST].  */
2756 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2757                                  int rm, int esz)
2758 {
2759     TCGv_ptr p = tcg_temp_new_ptr();
2760 
2761     /* Convert offset into vector into offset into ENV.
2762      * The final adjustment for the vector register base
2763      * is added via constant offset to the load.
2764      */
2765 #if HOST_BIG_ENDIAN
2766     /* Adjust for element ordering.  See vec_reg_offset.  */
2767     if (esz < 3) {
2768         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2769     }
2770 #endif
2771     tcg_gen_ext_i32_ptr(p, last);
2772     tcg_gen_add_ptr(p, p, tcg_env);
2773 
2774     return load_esz(p, vec_full_reg_offset(s, rm), esz);
2775 }
2776 
2777 /* Compute CLAST for a Zreg.  */
2778 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2779 {
2780     TCGv_i32 last;
2781     TCGLabel *over;
2782     TCGv_i64 ele;
2783     unsigned vsz, esz = a->esz;
2784 
2785     if (!sve_access_check(s)) {
2786         return true;
2787     }
2788 
2789     last = tcg_temp_new_i32();
2790     over = gen_new_label();
2791 
2792     find_last_active(s, last, esz, a->pg);
2793 
2794     /* There is of course no movcond for a 2048-bit vector,
2795      * so we must branch over the actual store.
2796      */
2797     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2798 
2799     if (!before) {
2800         incr_last_active(s, last, esz);
2801     }
2802 
2803     ele = load_last_active(s, last, a->rm, esz);
2804 
2805     vsz = vec_full_reg_size(s);
2806     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2807 
2808     /* If this insn used MOVPRFX, we may need a second move.  */
2809     if (a->rd != a->rn) {
2810         TCGLabel *done = gen_new_label();
2811         tcg_gen_br(done);
2812 
2813         gen_set_label(over);
2814         do_mov_z(s, a->rd, a->rn);
2815 
2816         gen_set_label(done);
2817     } else {
2818         gen_set_label(over);
2819     }
2820     return true;
2821 }
2822 
2823 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2824 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
2825 
2826 /* Compute CLAST for a scalar.  */
2827 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2828                             bool before, TCGv_i64 reg_val)
2829 {
2830     TCGv_i32 last = tcg_temp_new_i32();
2831     TCGv_i64 ele, cmp;
2832 
2833     find_last_active(s, last, esz, pg);
2834 
2835     /* Extend the original value of last prior to incrementing.  */
2836     cmp = tcg_temp_new_i64();
2837     tcg_gen_ext_i32_i64(cmp, last);
2838 
2839     if (!before) {
2840         incr_last_active(s, last, esz);
2841     }
2842 
2843     /* The conceit here is that while last < 0 indicates not found, after
2844      * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address
2845      * from which we can load garbage.  We then discard the garbage with
2846      * a conditional move.
2847      */
2848     ele = load_last_active(s, last, rm, esz);
2849 
2850     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2851                         ele, reg_val);
2852 }
2853 
2854 /* Compute CLAST for a Vreg.  */
2855 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2856 {
2857     if (sve_access_check(s)) {
2858         int esz = a->esz;
2859         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2860         TCGv_i64 reg = load_esz(tcg_env, ofs, esz);
2861 
2862         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2863         write_fp_dreg(s, a->rd, reg);
2864     }
2865     return true;
2866 }
2867 
2868 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2869 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
2870 
2871 /* Compute CLAST for a Xreg.  */
2872 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2873 {
2874     TCGv_i64 reg;
2875 
2876     if (!sve_access_check(s)) {
2877         return true;
2878     }
2879 
2880     reg = cpu_reg(s, a->rd);
2881     switch (a->esz) {
2882     case 0:
2883         tcg_gen_ext8u_i64(reg, reg);
2884         break;
2885     case 1:
2886         tcg_gen_ext16u_i64(reg, reg);
2887         break;
2888     case 2:
2889         tcg_gen_ext32u_i64(reg, reg);
2890         break;
2891     case 3:
2892         break;
2893     default:
2894         g_assert_not_reached();
2895     }
2896 
2897     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2898     return true;
2899 }
2900 
2901 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2902 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
2903 
2904 /* Compute LAST for a scalar.  */
2905 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2906                                int pg, int rm, bool before)
2907 {
2908     TCGv_i32 last = tcg_temp_new_i32();
2909 
2910     find_last_active(s, last, esz, pg);
2911     if (before) {
2912         wrap_last_active(s, last, esz);
2913     } else {
2914         incr_last_active(s, last, esz);
2915     }
2916 
2917     return load_last_active(s, last, rm, esz);
2918 }
2919 
2920 /* Compute LAST for a Vreg.  */
2921 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2922 {
2923     if (sve_access_check(s)) {
2924         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2925         write_fp_dreg(s, a->rd, val);
2926     }
2927     return true;
2928 }
2929 
2930 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2931 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
2932 
2933 /* Compute LAST for a Xreg.  */
2934 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2935 {
2936     if (sve_access_check(s)) {
2937         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2938         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2939     }
2940     return true;
2941 }
2942 
2943 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2944 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
2945 
2946 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2947 {
2948     if (!dc_isar_feature(aa64_sve, s)) {
2949         return false;
2950     }
2951     if (sve_access_check(s)) {
2952         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2953     }
2954     return true;
2955 }
2956 
2957 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2958 {
2959     if (!dc_isar_feature(aa64_sve, s)) {
2960         return false;
2961     }
2962     if (sve_access_check(s)) {
2963         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2964         TCGv_i64 t = load_esz(tcg_env, ofs, a->esz);
2965         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2966     }
2967     return true;
2968 }
2969 
2970 static gen_helper_gvec_3 * const revb_fns[4] = {
2971     NULL,                  gen_helper_sve_revb_h,
2972     gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2973 };
2974 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
2975 
2976 static gen_helper_gvec_3 * const revh_fns[4] = {
2977     NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2978 };
2979 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
2980 
2981 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2982            a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
2983 
2984 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
2985 
2986 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2987            gen_helper_sve_splice, a, a->esz)
2988 
2989 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2990            a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
2991 
2992 /*
2993  *** SVE Integer Compare - Vectors Group
2994  */
2995 
2996 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2997                           gen_helper_gvec_flags_4 *gen_fn)
2998 {
2999     TCGv_ptr pd, zn, zm, pg;
3000     unsigned vsz;
3001     TCGv_i32 t;
3002 
3003     if (gen_fn == NULL) {
3004         return false;
3005     }
3006     if (!sve_access_check(s)) {
3007         return true;
3008     }
3009 
3010     vsz = vec_full_reg_size(s);
3011     t = tcg_temp_new_i32();
3012     pd = tcg_temp_new_ptr();
3013     zn = tcg_temp_new_ptr();
3014     zm = tcg_temp_new_ptr();
3015     pg = tcg_temp_new_ptr();
3016 
3017     tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd));
3018     tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn));
3019     tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm));
3020     tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg));
3021 
3022     gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
3023 
3024     do_pred_flags(t);
3025     return true;
3026 }
3027 
3028 #define DO_PPZZ(NAME, name) \
3029     static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = {       \
3030         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
3031         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
3032     };                                                                  \
3033     TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags,                    \
3034                a, name##_ppzz_fns[a->esz])
3035 
3036 DO_PPZZ(CMPEQ, cmpeq)
3037 DO_PPZZ(CMPNE, cmpne)
3038 DO_PPZZ(CMPGT, cmpgt)
3039 DO_PPZZ(CMPGE, cmpge)
3040 DO_PPZZ(CMPHI, cmphi)
3041 DO_PPZZ(CMPHS, cmphs)
3042 
3043 #undef DO_PPZZ
3044 
3045 #define DO_PPZW(NAME, name) \
3046     static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = {       \
3047         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
3048         gen_helper_sve_##name##_ppzw_s, NULL                            \
3049     };                                                                  \
3050     TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags,                    \
3051                a, name##_ppzw_fns[a->esz])
3052 
3053 DO_PPZW(CMPEQ, cmpeq)
3054 DO_PPZW(CMPNE, cmpne)
3055 DO_PPZW(CMPGT, cmpgt)
3056 DO_PPZW(CMPGE, cmpge)
3057 DO_PPZW(CMPHI, cmphi)
3058 DO_PPZW(CMPHS, cmphs)
3059 DO_PPZW(CMPLT, cmplt)
3060 DO_PPZW(CMPLE, cmple)
3061 DO_PPZW(CMPLO, cmplo)
3062 DO_PPZW(CMPLS, cmpls)
3063 
3064 #undef DO_PPZW
3065 
3066 /*
3067  *** SVE Integer Compare - Immediate Groups
3068  */
3069 
3070 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3071                           gen_helper_gvec_flags_3 *gen_fn)
3072 {
3073     TCGv_ptr pd, zn, pg;
3074     unsigned vsz;
3075     TCGv_i32 t;
3076 
3077     if (gen_fn == NULL) {
3078         return false;
3079     }
3080     if (!sve_access_check(s)) {
3081         return true;
3082     }
3083 
3084     vsz = vec_full_reg_size(s);
3085     t = tcg_temp_new_i32();
3086     pd = tcg_temp_new_ptr();
3087     zn = tcg_temp_new_ptr();
3088     pg = tcg_temp_new_ptr();
3089 
3090     tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd));
3091     tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn));
3092     tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg));
3093 
3094     gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
3095 
3096     do_pred_flags(t);
3097     return true;
3098 }
3099 
3100 #define DO_PPZI(NAME, name) \
3101     static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = {         \
3102         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
3103         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
3104     };                                                                    \
3105     TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a,                   \
3106                name##_ppzi_fns[a->esz])
3107 
3108 DO_PPZI(CMPEQ, cmpeq)
3109 DO_PPZI(CMPNE, cmpne)
3110 DO_PPZI(CMPGT, cmpgt)
3111 DO_PPZI(CMPGE, cmpge)
3112 DO_PPZI(CMPHI, cmphi)
3113 DO_PPZI(CMPHS, cmphs)
3114 DO_PPZI(CMPLT, cmplt)
3115 DO_PPZI(CMPLE, cmple)
3116 DO_PPZI(CMPLO, cmplo)
3117 DO_PPZI(CMPLS, cmpls)
3118 
3119 #undef DO_PPZI
3120 
3121 /*
3122  *** SVE Partition Break Group
3123  */
3124 
3125 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3126                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3127 {
3128     if (!sve_access_check(s)) {
3129         return true;
3130     }
3131 
3132     unsigned vsz = pred_full_reg_size(s);
3133 
3134     /* Predicate sizes may be smaller and cannot use simd_desc.  */
3135     TCGv_ptr d = tcg_temp_new_ptr();
3136     TCGv_ptr n = tcg_temp_new_ptr();
3137     TCGv_ptr m = tcg_temp_new_ptr();
3138     TCGv_ptr g = tcg_temp_new_ptr();
3139     TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
3140 
3141     tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd));
3142     tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn));
3143     tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm));
3144     tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg));
3145 
3146     if (a->s) {
3147         TCGv_i32 t = tcg_temp_new_i32();
3148         fn_s(t, d, n, m, g, desc);
3149         do_pred_flags(t);
3150     } else {
3151         fn(d, n, m, g, desc);
3152     }
3153     return true;
3154 }
3155 
3156 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3157                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3158 {
3159     if (!sve_access_check(s)) {
3160         return true;
3161     }
3162 
3163     unsigned vsz = pred_full_reg_size(s);
3164 
3165     /* Predicate sizes may be smaller and cannot use simd_desc.  */
3166     TCGv_ptr d = tcg_temp_new_ptr();
3167     TCGv_ptr n = tcg_temp_new_ptr();
3168     TCGv_ptr g = tcg_temp_new_ptr();
3169     TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
3170 
3171     tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd));
3172     tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn));
3173     tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg));
3174 
3175     if (a->s) {
3176         TCGv_i32 t = tcg_temp_new_i32();
3177         fn_s(t, d, n, g, desc);
3178         do_pred_flags(t);
3179     } else {
3180         fn(d, n, g, desc);
3181     }
3182     return true;
3183 }
3184 
3185 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
3186            gen_helper_sve_brkpa, gen_helper_sve_brkpas)
3187 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
3188            gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
3189 
3190 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
3191            gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
3192 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
3193            gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
3194 
3195 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
3196            gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
3197 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
3198            gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
3199 
3200 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
3201            gen_helper_sve_brkn, gen_helper_sve_brkns)
3202 
3203 /*
3204  *** SVE Predicate Count Group
3205  */
3206 
3207 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3208 {
3209     unsigned psz = pred_full_reg_size(s);
3210 
3211     if (psz <= 8) {
3212         uint64_t psz_mask;
3213 
3214         tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn));
3215         if (pn != pg) {
3216             TCGv_i64 g = tcg_temp_new_i64();
3217             tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg));
3218             tcg_gen_and_i64(val, val, g);
3219         }
3220 
3221         /* Reduce the pred_esz_masks value simply to reduce the
3222          * size of the code generated here.
3223          */
3224         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3225         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3226 
3227         tcg_gen_ctpop_i64(val, val);
3228     } else {
3229         TCGv_ptr t_pn = tcg_temp_new_ptr();
3230         TCGv_ptr t_pg = tcg_temp_new_ptr();
3231         unsigned desc = 0;
3232 
3233         desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3234         desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
3235 
3236         tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn));
3237         tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
3238 
3239         gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
3240     }
3241 }
3242 
3243 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3244 {
3245     if (!dc_isar_feature(aa64_sve, s)) {
3246         return false;
3247     }
3248     if (sve_access_check(s)) {
3249         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3250     }
3251     return true;
3252 }
3253 
3254 static bool trans_CNTP_c(DisasContext *s, arg_CNTP_c *a)
3255 {
3256     TCGv_i32 t_png;
3257     uint32_t desc = 0;
3258 
3259     if (dc_isar_feature(aa64_sve2p1, s)) {
3260         if (!sve_access_check(s)) {
3261             return true;
3262         }
3263     } else if (dc_isar_feature(aa64_sme2, s)) {
3264         if (!sme_sm_enabled_check(s)) {
3265             return true;
3266         }
3267     } else {
3268         return false;
3269     }
3270 
3271     t_png = tcg_temp_new_i32();
3272     tcg_gen_ld16u_i32(t_png, tcg_env,
3273                       pred_full_reg_offset(s, a->rn) ^
3274                       (HOST_BIG_ENDIAN ? 6 : 0));
3275 
3276     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
3277     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3278     desc = FIELD_DP32(desc, PREDDESC, DATA, a->vl);
3279 
3280     gen_helper_sve2p1_cntp_c(cpu_reg(s, a->rd), t_png, tcg_constant_i32(desc));
3281     return true;
3282 }
3283 
3284 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3285 {
3286     if (!dc_isar_feature(aa64_sve, s)) {
3287         return false;
3288     }
3289     if (sve_access_check(s)) {
3290         TCGv_i64 reg = cpu_reg(s, a->rd);
3291         TCGv_i64 val = tcg_temp_new_i64();
3292 
3293         do_cntp(s, val, a->esz, a->pg, a->pg);
3294         if (a->d) {
3295             tcg_gen_sub_i64(reg, reg, val);
3296         } else {
3297             tcg_gen_add_i64(reg, reg, val);
3298         }
3299     }
3300     return true;
3301 }
3302 
3303 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3304 {
3305     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3306         return false;
3307     }
3308     if (sve_access_check(s)) {
3309         unsigned vsz = vec_full_reg_size(s);
3310         TCGv_i64 val = tcg_temp_new_i64();
3311         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3312 
3313         do_cntp(s, val, a->esz, a->pg, a->pg);
3314         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3315                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3316     }
3317     return true;
3318 }
3319 
3320 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3321 {
3322     if (!dc_isar_feature(aa64_sve, s)) {
3323         return false;
3324     }
3325     if (sve_access_check(s)) {
3326         TCGv_i64 reg = cpu_reg(s, a->rd);
3327         TCGv_i64 val = tcg_temp_new_i64();
3328 
3329         do_cntp(s, val, a->esz, a->pg, a->pg);
3330         do_sat_addsub_32(reg, val, a->u, a->d);
3331     }
3332     return true;
3333 }
3334 
3335 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3336 {
3337     if (!dc_isar_feature(aa64_sve, s)) {
3338         return false;
3339     }
3340     if (sve_access_check(s)) {
3341         TCGv_i64 reg = cpu_reg(s, a->rd);
3342         TCGv_i64 val = tcg_temp_new_i64();
3343 
3344         do_cntp(s, val, a->esz, a->pg, a->pg);
3345         do_sat_addsub_64(reg, val, a->u, a->d);
3346     }
3347     return true;
3348 }
3349 
3350 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3351 {
3352     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3353         return false;
3354     }
3355     if (sve_access_check(s)) {
3356         TCGv_i64 val = tcg_temp_new_i64();
3357         do_cntp(s, val, a->esz, a->pg, a->pg);
3358         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3359     }
3360     return true;
3361 }
3362 
3363 /*
3364  *** SVE Integer Compare Scalars Group
3365  */
3366 
3367 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3368 {
3369     if (!dc_isar_feature(aa64_sve, s)) {
3370         return false;
3371     }
3372     if (!sve_access_check(s)) {
3373         return true;
3374     }
3375 
3376     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3377     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3378     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3379     TCGv_i64 cmp = tcg_temp_new_i64();
3380 
3381     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3382     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3383 
3384     /* VF = !NF & !CF.  */
3385     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3386     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3387 
3388     /* Both NF and VF actually look at bit 31.  */
3389     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3390     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3391     return true;
3392 }
3393 
3394 typedef void gen_while_fn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
3395 static bool do_WHILE(DisasContext *s, arg_while *a,
3396                      bool lt, int scale, int data, gen_while_fn *fn)
3397 {
3398     TCGv_i64 op0, op1, t0, t1, tmax;
3399     TCGv_i32 t2;
3400     TCGv_ptr ptr;
3401     unsigned vsz = vec_full_reg_size(s);
3402     unsigned desc = 0;
3403     TCGCond cond;
3404     uint64_t maxval;
3405     /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3406     bool eq = a->eq == lt;
3407 
3408     if (!sve_access_check(s)) {
3409         return true;
3410     }
3411 
3412     op0 = read_cpu_reg(s, a->rn, 1);
3413     op1 = read_cpu_reg(s, a->rm, 1);
3414 
3415     if (!a->sf) {
3416         if (a->u) {
3417             tcg_gen_ext32u_i64(op0, op0);
3418             tcg_gen_ext32u_i64(op1, op1);
3419         } else {
3420             tcg_gen_ext32s_i64(op0, op0);
3421             tcg_gen_ext32s_i64(op1, op1);
3422         }
3423     }
3424 
3425     /* For the helper, compress the different conditions into a computation
3426      * of how many iterations for which the condition is true.
3427      */
3428     t0 = tcg_temp_new_i64();
3429     t1 = tcg_temp_new_i64();
3430 
3431     if (lt) {
3432         tcg_gen_sub_i64(t0, op1, op0);
3433         if (a->u) {
3434             maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3435             cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3436         } else {
3437             maxval = a->sf ? INT64_MAX : INT32_MAX;
3438             cond = eq ? TCG_COND_LE : TCG_COND_LT;
3439         }
3440     } else {
3441         tcg_gen_sub_i64(t0, op0, op1);
3442         if (a->u) {
3443             maxval = 0;
3444             cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3445         } else {
3446             maxval = a->sf ? INT64_MIN : INT32_MIN;
3447             cond = eq ? TCG_COND_GE : TCG_COND_GT;
3448         }
3449     }
3450 
3451     tmax = tcg_constant_i64((vsz << scale) >> a->esz);
3452     if (eq) {
3453         /* Equality means one more iteration.  */
3454         tcg_gen_addi_i64(t0, t0, 1);
3455 
3456         /*
3457          * For the less-than while, if op1 is maxval (and the only time
3458          * the addition above could overflow), then we produce an all-true
3459          * predicate by setting the count to the vector length.  This is
3460          * because the pseudocode is described as an increment + compare
3461          * loop, and the maximum integer would always compare true.
3462          * Similarly, the greater-than while has the same issue with the
3463          * minimum integer due to the decrement + compare loop.
3464          */
3465         tcg_gen_movi_i64(t1, maxval);
3466         tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3467     }
3468 
3469     /* Bound to the maximum.  */
3470     tcg_gen_umin_i64(t0, t0, tmax);
3471 
3472     /* Set the count to zero if the condition is false.  */
3473     tcg_gen_movi_i64(t1, 0);
3474     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3475 
3476     /* Since we're bounded, pass as a 32-bit type.  */
3477     t2 = tcg_temp_new_i32();
3478     tcg_gen_extrl_i64_i32(t2, t0);
3479 
3480     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3481     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3482     desc = FIELD_DP32(desc, PREDDESC, DATA, data);
3483 
3484     ptr = tcg_temp_new_ptr();
3485     tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
3486 
3487     fn(t2, ptr, t2, tcg_constant_i32(desc));
3488 
3489     do_pred_flags(t2);
3490     return true;
3491 }
3492 
3493 TRANS_FEAT(WHILE_lt, aa64_sve, do_WHILE,
3494            a, true, 0, 0, gen_helper_sve_whilel)
3495 TRANS_FEAT(WHILE_gt, aa64_sve2, do_WHILE,
3496            a, false, 0, 0, gen_helper_sve_whileg)
3497 
3498 TRANS_FEAT(WHILE_lt_pair, aa64_sme2_or_sve2p1, do_WHILE,
3499            a, true, 1, 0, gen_helper_sve_while2l)
3500 TRANS_FEAT(WHILE_gt_pair, aa64_sme2_or_sve2p1, do_WHILE,
3501            a, false, 1, 0, gen_helper_sve_while2g)
3502 
3503 TRANS_FEAT(WHILE_lt_cnt2, aa64_sme2_or_sve2p1, do_WHILE,
3504            a, true, 1, 1, gen_helper_sve_whilecl)
3505 TRANS_FEAT(WHILE_lt_cnt4, aa64_sme2_or_sve2p1, do_WHILE,
3506            a, true, 2, 2, gen_helper_sve_whilecl)
3507 TRANS_FEAT(WHILE_gt_cnt2, aa64_sme2_or_sve2p1, do_WHILE,
3508            a, false, 1, 1, gen_helper_sve_whilecg)
3509 TRANS_FEAT(WHILE_gt_cnt4, aa64_sme2_or_sve2p1, do_WHILE,
3510            a, false, 2, 2, gen_helper_sve_whilecg)
3511 
3512 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3513 {
3514     TCGv_i64 op0, op1, diff, t1, tmax;
3515     TCGv_i32 t2;
3516     TCGv_ptr ptr;
3517     unsigned vsz = vec_full_reg_size(s);
3518     unsigned desc = 0;
3519 
3520     if (!dc_isar_feature(aa64_sve2, s)) {
3521         return false;
3522     }
3523     if (!sve_access_check(s)) {
3524         return true;
3525     }
3526 
3527     op0 = read_cpu_reg(s, a->rn, 1);
3528     op1 = read_cpu_reg(s, a->rm, 1);
3529 
3530     tmax = tcg_constant_i64(vsz >> a->esz);
3531     diff = tcg_temp_new_i64();
3532 
3533     if (a->rw) {
3534         /* WHILERW */
3535         /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3536         t1 = tcg_temp_new_i64();
3537         tcg_gen_sub_i64(diff, op0, op1);
3538         tcg_gen_sub_i64(t1, op1, op0);
3539         tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3540         /* Divide, rounding down, by ESIZE.  */
3541         tcg_gen_shri_i64(diff, diff, a->esz);
3542         /* If op1 == op0, diff == 0, and the condition is always true. */
3543         tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3544     } else {
3545         /* WHILEWR */
3546         tcg_gen_sub_i64(diff, op1, op0);
3547         /* Divide, rounding down, by ESIZE.  */
3548         tcg_gen_shri_i64(diff, diff, a->esz);
3549         /* If op0 >= op1, diff <= 0, the condition is always true. */
3550         tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3551     }
3552 
3553     /* Bound to the maximum.  */
3554     tcg_gen_umin_i64(diff, diff, tmax);
3555 
3556     /* Since we're bounded, pass as a 32-bit type.  */
3557     t2 = tcg_temp_new_i32();
3558     tcg_gen_extrl_i64_i32(t2, diff);
3559 
3560     desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3561     desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3562 
3563     ptr = tcg_temp_new_ptr();
3564     tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
3565 
3566     gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3567     do_pred_flags(t2);
3568     return true;
3569 }
3570 
3571 static bool do_pext(DisasContext *s, arg_pext *a, int n)
3572 {
3573     TCGv_i32 t_png;
3574     TCGv_ptr t_pd;
3575     int pl;
3576 
3577     if (!sve_access_check(s)) {
3578         return true;
3579     }
3580 
3581     t_png = tcg_temp_new_i32();
3582     tcg_gen_ld16u_i32(t_png, tcg_env,
3583                       pred_full_reg_offset(s, a->rn) ^
3584                       (HOST_BIG_ENDIAN ? 6 : 0));
3585 
3586     t_pd = tcg_temp_new_ptr();
3587     pl = pred_full_reg_size(s);
3588 
3589     for (int i = 0; i < n; ++i) {
3590         int rd = (a->rd + i) % 16;
3591         int part = a->imm * n + i;
3592         unsigned desc = 0;
3593 
3594         desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pl);
3595         desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3596         desc = FIELD_DP32(desc, PREDDESC, DATA, part);
3597 
3598         tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, rd));
3599         gen_helper_pext(t_pd, t_png, tcg_constant_i32(desc));
3600     }
3601     return true;
3602 }
3603 
3604 TRANS_FEAT(PEXT_1, aa64_sme2_or_sve2p1, do_pext, a, 1)
3605 TRANS_FEAT(PEXT_2, aa64_sme2_or_sve2p1, do_pext, a, 2)
3606 
3607 /*
3608  *** SVE Integer Wide Immediate - Unpredicated Group
3609  */
3610 
3611 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3612 {
3613     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3614         return false;
3615     }
3616     if (sve_access_check(s)) {
3617         unsigned vsz = vec_full_reg_size(s);
3618         int dofs = vec_full_reg_offset(s, a->rd);
3619         uint64_t imm;
3620 
3621         /* Decode the VFP immediate.  */
3622         imm = vfp_expand_imm(a->esz, a->imm);
3623         tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3624     }
3625     return true;
3626 }
3627 
3628 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3629 {
3630     if (!dc_isar_feature(aa64_sve, s)) {
3631         return false;
3632     }
3633     if (sve_access_check(s)) {
3634         unsigned vsz = vec_full_reg_size(s);
3635         int dofs = vec_full_reg_offset(s, a->rd);
3636         tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3637     }
3638     return true;
3639 }
3640 
3641 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
3642 
3643 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3644 {
3645     a->imm = -a->imm;
3646     return trans_ADD_zzi(s, a);
3647 }
3648 
3649 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3650 {
3651     static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3652     static const GVecGen2s op[4] = {
3653         { .fni8 = tcg_gen_vec_sub8_i64,
3654           .fniv = tcg_gen_sub_vec,
3655           .fno = gen_helper_sve_subri_b,
3656           .opt_opc = vecop_list,
3657           .vece = MO_8,
3658           .scalar_first = true },
3659         { .fni8 = tcg_gen_vec_sub16_i64,
3660           .fniv = tcg_gen_sub_vec,
3661           .fno = gen_helper_sve_subri_h,
3662           .opt_opc = vecop_list,
3663           .vece = MO_16,
3664           .scalar_first = true },
3665         { .fni4 = tcg_gen_sub_i32,
3666           .fniv = tcg_gen_sub_vec,
3667           .fno = gen_helper_sve_subri_s,
3668           .opt_opc = vecop_list,
3669           .vece = MO_32,
3670           .scalar_first = true },
3671         { .fni8 = tcg_gen_sub_i64,
3672           .fniv = tcg_gen_sub_vec,
3673           .fno = gen_helper_sve_subri_d,
3674           .opt_opc = vecop_list,
3675           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3676           .vece = MO_64,
3677           .scalar_first = true }
3678     };
3679 
3680     if (!dc_isar_feature(aa64_sve, s)) {
3681         return false;
3682     }
3683     if (sve_access_check(s)) {
3684         unsigned vsz = vec_full_reg_size(s);
3685         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3686                         vec_full_reg_offset(s, a->rn),
3687                         vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
3688     }
3689     return true;
3690 }
3691 
3692 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
3693 
3694 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3695 {
3696     if (sve_access_check(s)) {
3697         do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3698                           tcg_constant_i64(a->imm), u, d);
3699     }
3700     return true;
3701 }
3702 
3703 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3704 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3705 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3706 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
3707 
3708 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3709 {
3710     if (sve_access_check(s)) {
3711         unsigned vsz = vec_full_reg_size(s);
3712         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3713                             vec_full_reg_offset(s, a->rn),
3714                             tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
3715     }
3716     return true;
3717 }
3718 
3719 #define DO_ZZI(NAME, name) \
3720     static gen_helper_gvec_2i * const name##i_fns[4] = {                \
3721         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3722         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3723     };                                                                  \
3724     TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
3725 
3726 DO_ZZI(SMAX, smax)
3727 DO_ZZI(UMAX, umax)
3728 DO_ZZI(SMIN, smin)
3729 DO_ZZI(UMIN, umin)
3730 
3731 #undef DO_ZZI
3732 
3733 static gen_helper_gvec_4 * const dot_fns[2][2] = {
3734     { gen_helper_gvec_sdot_4b, gen_helper_gvec_sdot_4h },
3735     { gen_helper_gvec_udot_4b, gen_helper_gvec_udot_4h }
3736 };
3737 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3738            dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
3739 
3740 /*
3741  * SVE Multiply - Indexed
3742  */
3743 
3744 TRANS_FEAT(SDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz,
3745            gen_helper_gvec_sdot_idx_4b, a)
3746 TRANS_FEAT(SDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz,
3747            gen_helper_gvec_sdot_idx_4h, a)
3748 TRANS_FEAT(UDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz,
3749            gen_helper_gvec_udot_idx_4b, a)
3750 TRANS_FEAT(UDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz,
3751            gen_helper_gvec_udot_idx_4h, a)
3752 
3753 TRANS_FEAT(SUDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3754            gen_helper_gvec_sudot_idx_4b, a)
3755 TRANS_FEAT(USDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3756            gen_helper_gvec_usdot_idx_4b, a)
3757 
3758 TRANS_FEAT(SDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz,
3759            gen_helper_gvec_sdot_idx_2h, a)
3760 TRANS_FEAT(UDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz,
3761            gen_helper_gvec_udot_idx_2h, a)
3762 
3763 #define DO_SVE2_RRX(NAME, FUNC) \
3764     TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC,          \
3765                a->rd, a->rn, a->rm, a->index)
3766 
3767 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3768 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3769 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
3770 
3771 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3772 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3773 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
3774 
3775 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3776 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3777 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
3778 
3779 #undef DO_SVE2_RRX
3780 
3781 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
3782     TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC,          \
3783                a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3784 
3785 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3786 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3787 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3788 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3789 
3790 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3791 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3792 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3793 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3794 
3795 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3796 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3797 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3798 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
3799 
3800 #undef DO_SVE2_RRX_TB
3801 
3802 #define DO_SVE2_RRXR(NAME, FUNC) \
3803     TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
3804 
3805 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3806 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3807 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
3808 
3809 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3810 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3811 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
3812 
3813 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3814 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3815 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
3816 
3817 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3818 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3819 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
3820 
3821 #undef DO_SVE2_RRXR
3822 
3823 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
3824     TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC,        \
3825                a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3826 
3827 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3828 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3829 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3830 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3831 
3832 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3833 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3834 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3835 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3836 
3837 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3838 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3839 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3840 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3841 
3842 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3843 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3844 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3845 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3846 
3847 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3848 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3849 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3850 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3851 
3852 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3853 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3854 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3855 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
3856 
3857 #undef DO_SVE2_RRXR_TB
3858 
3859 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \
3860     TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC,           \
3861                a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3862 
3863 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3864 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3865 
3866 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3867 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3868 
3869 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3870 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3871 
3872 #undef DO_SVE2_RRXR_ROT
3873 
3874 /*
3875  *** SVE Floating Point Multiply-Add Indexed Group
3876  */
3877 
3878 static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
3879     NULL,                       gen_helper_gvec_fmla_idx_h,
3880     gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
3881 };
3882 TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
3883            fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
3884            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
3885 
3886 static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
3887     { NULL, NULL },
3888     { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
3889     { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
3890     { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
3891 };
3892 TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
3893            fmls_idx_fns[a->esz][s->fpcr_ah],
3894            a->rd, a->rn, a->rm, a->ra, a->index,
3895            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
3896 
3897 /*
3898  *** SVE Floating Point Multiply Indexed Group
3899  */
3900 
3901 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3902     NULL,                       gen_helper_gvec_fmul_idx_h,
3903     gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3904 };
3905 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3906            fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3907            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
3908 
3909 /*
3910  *** SVE Floating Point Fast Reduction Group
3911  */
3912 
3913 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3914                                   TCGv_ptr, TCGv_i32);
3915 
3916 static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
3917                       gen_helper_fp_reduce *fn)
3918 {
3919     unsigned vsz, p2vsz;
3920     TCGv_i32 t_desc;
3921     TCGv_ptr t_zn, t_pg, status;
3922     TCGv_i64 temp;
3923 
3924     if (fn == NULL) {
3925         return false;
3926     }
3927     if (!sve_access_check(s)) {
3928         return true;
3929     }
3930 
3931     vsz = vec_full_reg_size(s);
3932     p2vsz = pow2ceil(vsz);
3933     t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
3934     temp = tcg_temp_new_i64();
3935     t_zn = tcg_temp_new_ptr();
3936     t_pg = tcg_temp_new_ptr();
3937 
3938     tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
3939     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
3940     status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
3941 
3942     fn(temp, t_zn, t_pg, status, t_desc);
3943 
3944     write_fp_dreg(s, a->rd, temp);
3945     return true;
3946 }
3947 
3948 #define DO_VPZ(NAME, name) \
3949     static gen_helper_fp_reduce * const name##_fns[4] = {                \
3950         NULL,                      gen_helper_sve_##name##_h,            \
3951         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
3952     };                                                                   \
3953     TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
3954 
3955 #define DO_VPZ_AH(NAME, name)                                            \
3956     static gen_helper_fp_reduce * const name##_fns[4] = {                \
3957         NULL,                      gen_helper_sve_##name##_h,            \
3958         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
3959     };                                                                   \
3960     static gen_helper_fp_reduce * const name##_ah_fns[4] = {             \
3961         NULL,                      gen_helper_sve_ah_##name##_h,         \
3962         gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d,      \
3963     };                                                                   \
3964     TRANS_FEAT(NAME, aa64_sve, do_reduce, a,                             \
3965                s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
3966 
3967 DO_VPZ(FADDV, faddv)
3968 DO_VPZ(FMINNMV, fminnmv)
3969 DO_VPZ(FMAXNMV, fmaxnmv)
3970 DO_VPZ_AH(FMINV, fminv)
3971 DO_VPZ_AH(FMAXV, fmaxv)
3972 
3973 #undef DO_VPZ
3974 
3975 static gen_helper_gvec_3_ptr * const faddqv_fns[4] = {
3976     NULL,                       gen_helper_sve2p1_faddqv_h,
3977     gen_helper_sve2p1_faddqv_s, gen_helper_sve2p1_faddqv_d,
3978 };
3979 TRANS_FEAT(FADDQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
3980            faddqv_fns[a->esz], a, 0,
3981            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
3982 
3983 static gen_helper_gvec_3_ptr * const fmaxnmqv_fns[4] = {
3984     NULL,                         gen_helper_sve2p1_fmaxnmqv_h,
3985     gen_helper_sve2p1_fmaxnmqv_s, gen_helper_sve2p1_fmaxnmqv_d,
3986 };
3987 TRANS_FEAT(FMAXNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
3988            fmaxnmqv_fns[a->esz], a, 0,
3989            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
3990 
3991 static gen_helper_gvec_3_ptr * const fminnmqv_fns[4] = {
3992     NULL,                         gen_helper_sve2p1_fminnmqv_h,
3993     gen_helper_sve2p1_fminnmqv_s, gen_helper_sve2p1_fminnmqv_d,
3994 };
3995 TRANS_FEAT(FMINNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
3996            fminnmqv_fns[a->esz], a, 0,
3997            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
3998 
3999 static gen_helper_gvec_3_ptr * const fmaxqv_fns[4] = {
4000     NULL,                       gen_helper_sve2p1_fmaxqv_h,
4001     gen_helper_sve2p1_fmaxqv_s, gen_helper_sve2p1_fmaxqv_d,
4002 };
4003 static gen_helper_gvec_3_ptr * const fmaxqv_ah_fns[4] = {
4004     NULL,                          gen_helper_sve2p1_ah_fmaxqv_h,
4005     gen_helper_sve2p1_ah_fmaxqv_s, gen_helper_sve2p1_ah_fmaxqv_d,
4006 };
4007 TRANS_FEAT(FMAXQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
4008            (s->fpcr_ah ? fmaxqv_fns : fmaxqv_ah_fns)[a->esz], a, 0,
4009            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
4010 
4011 static gen_helper_gvec_3_ptr * const fminqv_fns[4] = {
4012     NULL,                       gen_helper_sve2p1_fminqv_h,
4013     gen_helper_sve2p1_fminqv_s, gen_helper_sve2p1_fminqv_d,
4014 };
4015 static gen_helper_gvec_3_ptr * const fminqv_ah_fns[4] = {
4016     NULL,                          gen_helper_sve2p1_ah_fminqv_h,
4017     gen_helper_sve2p1_ah_fminqv_s, gen_helper_sve2p1_ah_fminqv_d,
4018 };
4019 TRANS_FEAT(FMINQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
4020            (s->fpcr_ah ? fminqv_fns : fminqv_ah_fns)[a->esz], a, 0,
4021            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
4022 
4023 /*
4024  *** SVE Floating Point Unary Operations - Unpredicated Group
4025  */
4026 
4027 static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
4028     NULL,                     gen_helper_gvec_frecpe_h,
4029     gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
4030 };
4031 static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = {
4032     NULL,                           gen_helper_gvec_frecpe_h,
4033     gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d,
4034 };
4035 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz,
4036            s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
4037            frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0)
4038 
4039 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
4040     NULL,                      gen_helper_gvec_frsqrte_h,
4041     gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
4042 };
4043 static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = {
4044     NULL,                            gen_helper_gvec_frsqrte_h,
4045     gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d,
4046 };
4047 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz,
4048            s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
4049            frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0)
4050 
4051 /*
4052  *** SVE Floating Point Compare with Zero Group
4053  */
4054 
4055 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4056                       gen_helper_gvec_3_ptr *fn)
4057 {
4058     if (fn == NULL) {
4059         return false;
4060     }
4061     if (sve_access_check(s)) {
4062         unsigned vsz = vec_full_reg_size(s);
4063         TCGv_ptr status =
4064             fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
4065 
4066         tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4067                            vec_full_reg_offset(s, a->rn),
4068                            pred_full_reg_offset(s, a->pg),
4069                            status, vsz, vsz, 0, fn);
4070     }
4071     return true;
4072 }
4073 
4074 #define DO_PPZ(NAME, name) \
4075     static gen_helper_gvec_3_ptr * const name##_fns[] = {         \
4076         NULL,                      gen_helper_sve_##name##_h,     \
4077         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,     \
4078     };                                                            \
4079     TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
4080 
4081 DO_PPZ(FCMGE_ppz0, fcmge0)
4082 DO_PPZ(FCMGT_ppz0, fcmgt0)
4083 DO_PPZ(FCMLE_ppz0, fcmle0)
4084 DO_PPZ(FCMLT_ppz0, fcmlt0)
4085 DO_PPZ(FCMEQ_ppz0, fcmeq0)
4086 DO_PPZ(FCMNE_ppz0, fcmne0)
4087 
4088 #undef DO_PPZ
4089 
4090 /*
4091  *** SVE floating-point trig multiply-add coefficient
4092  */
4093 
4094 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
4095     NULL,                   gen_helper_sve_ftmad_h,
4096     gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
4097 };
4098 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
4099                         ftmad_fns[a->esz], a->rd, a->rn, a->rm,
4100                         a->imm | (s->fpcr_ah << 3),
4101                         a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
4102 
4103 /*
4104  *** SVE Floating Point Accumulating Reduction Group
4105  */
4106 
4107 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
4108 {
4109     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4110                           TCGv_ptr, TCGv_ptr, TCGv_i32);
4111     static fadda_fn * const fns[3] = {
4112         gen_helper_sve_fadda_h,
4113         gen_helper_sve_fadda_s,
4114         gen_helper_sve_fadda_d,
4115     };
4116     unsigned vsz = vec_full_reg_size(s);
4117     TCGv_ptr t_rm, t_pg, t_fpst;
4118     TCGv_i64 t_val;
4119     TCGv_i32 t_desc;
4120 
4121     if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
4122         return false;
4123     }
4124     s->is_nonstreaming = true;
4125     if (!sve_access_check(s)) {
4126         return true;
4127     }
4128 
4129     t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4130     t_rm = tcg_temp_new_ptr();
4131     t_pg = tcg_temp_new_ptr();
4132     tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm));
4133     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
4134     t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
4135     t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
4136 
4137     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4138 
4139     write_fp_dreg(s, a->rd, t_val);
4140     return true;
4141 }
4142 
4143 /*
4144  *** SVE Floating Point Arithmetic - Unpredicated Group
4145  */
4146 
4147 #define DO_FP3(NAME, name) \
4148     static gen_helper_gvec_3_ptr * const name##_fns[4] = {          \
4149         NULL, gen_helper_gvec_##name##_h,                           \
4150         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
4151     };                                                              \
4152     TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
4153 
4154 #define DO_FP3_AH(NAME, name) \
4155     static gen_helper_gvec_3_ptr * const name##_fns[4] = {          \
4156         NULL, gen_helper_gvec_##name##_h,                           \
4157         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
4158     };                                                              \
4159     static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = {       \
4160         NULL, gen_helper_gvec_ah_##name##_h,                        \
4161         gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d    \
4162     };                                                              \
4163     TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz,            \
4164                s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0)
4165 
4166 DO_FP3(FADD_zzz, fadd)
4167 DO_FP3(FSUB_zzz, fsub)
4168 DO_FP3(FMUL_zzz, fmul)
4169 DO_FP3_AH(FRECPS, recps)
4170 DO_FP3_AH(FRSQRTS, rsqrts)
4171 
4172 #undef DO_FP3
4173 
4174 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
4175     NULL,                     gen_helper_gvec_ftsmul_h,
4176     gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
4177 };
4178 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
4179                         ftsmul_fns[a->esz], a, 0)
4180 
4181 /*
4182  *** SVE Floating Point Arithmetic - Predicated Group
4183  */
4184 
4185 #define DO_ZPZZ_FP(NAME, FEAT, name) \
4186     static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
4187         NULL,                  gen_helper_##name##_h,           \
4188         gen_helper_##name##_s, gen_helper_##name##_d            \
4189     };                                                          \
4190     TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
4191 
4192 #define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name)                        \
4193     static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = {         \
4194         NULL,                  gen_helper_##name##_h,                   \
4195         gen_helper_##name##_s, gen_helper_##name##_d                    \
4196     };                                                                  \
4197     static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = {      \
4198         NULL,                  gen_helper_##ah_name##_h,                \
4199         gen_helper_##ah_name##_s, gen_helper_##ah_name##_d              \
4200     };                                                                  \
4201     TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz,                      \
4202                s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] :                \
4203                name##_zpzz_fns[a->esz], a)
4204 
4205 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
4206 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
4207 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
4208 DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
4209 DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
4210 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
4211 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
4212 DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd)
4213 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
4214 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
4215 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
4216 
4217 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4218                                       TCGv_i64, TCGv_ptr, TCGv_i32);
4219 
4220 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4221                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4222 {
4223     unsigned vsz = vec_full_reg_size(s);
4224     TCGv_ptr t_zd, t_zn, t_pg, status;
4225     TCGv_i32 desc;
4226 
4227     t_zd = tcg_temp_new_ptr();
4228     t_zn = tcg_temp_new_ptr();
4229     t_pg = tcg_temp_new_ptr();
4230     tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd));
4231     tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn));
4232     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
4233 
4234     status = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
4235     desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
4236     fn(t_zd, t_zn, t_pg, scalar, status, desc);
4237 }
4238 
4239 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4240                       gen_helper_sve_fp2scalar *fn)
4241 {
4242     if (fn == NULL) {
4243         return false;
4244     }
4245     if (sve_access_check(s)) {
4246         do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4247                      tcg_constant_i64(imm), fn);
4248     }
4249     return true;
4250 }
4251 
4252 #define DO_FP_IMM(NAME, name, const0, const1)                           \
4253     static gen_helper_sve_fp2scalar * const name##_fns[4] = {           \
4254         NULL, gen_helper_sve_##name##_h,                                \
4255         gen_helper_sve_##name##_s,                                      \
4256         gen_helper_sve_##name##_d                                       \
4257     };                                                                  \
4258     static uint64_t const name##_const[4][2] = {                        \
4259         { -1, -1 },                                                     \
4260         { float16_##const0, float16_##const1 },                         \
4261         { float32_##const0, float32_##const1 },                         \
4262         { float64_##const0, float64_##const1 },                         \
4263     };                                                                  \
4264     TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a,                     \
4265                name##_const[a->esz][a->imm], name##_fns[a->esz])
4266 
4267 #define DO_FP_AH_IMM(NAME, name, const0, const1)                        \
4268     static gen_helper_sve_fp2scalar * const name##_fns[4] = {           \
4269         NULL, gen_helper_sve_##name##_h,                                \
4270         gen_helper_sve_##name##_s,                                      \
4271         gen_helper_sve_##name##_d                                       \
4272     };                                                                  \
4273     static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = {        \
4274         NULL, gen_helper_sve_ah_##name##_h,                             \
4275         gen_helper_sve_ah_##name##_s,                                   \
4276         gen_helper_sve_ah_##name##_d                                    \
4277     };                                                                  \
4278     static uint64_t const name##_const[4][2] = {                        \
4279         { -1, -1 },                                                     \
4280         { float16_##const0, float16_##const1 },                         \
4281         { float32_##const0, float32_##const1 },                         \
4282         { float64_##const0, float64_##const1 },                         \
4283     };                                                                  \
4284     TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a,                     \
4285                name##_const[a->esz][a->imm],                            \
4286                s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
4287 
4288 DO_FP_IMM(FADD, fadds, half, one)
4289 DO_FP_IMM(FSUB, fsubs, half, one)
4290 DO_FP_IMM(FMUL, fmuls, half, two)
4291 DO_FP_IMM(FSUBR, fsubrs, half, one)
4292 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4293 DO_FP_IMM(FMINNM, fminnms, zero, one)
4294 DO_FP_AH_IMM(FMAX, fmaxs, zero, one)
4295 DO_FP_AH_IMM(FMIN, fmins, zero, one)
4296 
4297 #undef DO_FP_IMM
4298 
4299 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4300                       gen_helper_gvec_4_ptr *fn)
4301 {
4302     if (fn == NULL) {
4303         return false;
4304     }
4305     if (sve_access_check(s)) {
4306         unsigned vsz = vec_full_reg_size(s);
4307         TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
4308         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4309                            vec_full_reg_offset(s, a->rn),
4310                            vec_full_reg_offset(s, a->rm),
4311                            pred_full_reg_offset(s, a->pg),
4312                            status, vsz, vsz, 0, fn);
4313     }
4314     return true;
4315 }
4316 
4317 #define DO_FPCMP(NAME, name) \
4318     static gen_helper_gvec_4_ptr * const name##_fns[4] = {            \
4319         NULL, gen_helper_sve_##name##_h,                              \
4320         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
4321     };                                                                \
4322     TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
4323 
4324 DO_FPCMP(FCMGE, fcmge)
4325 DO_FPCMP(FCMGT, fcmgt)
4326 DO_FPCMP(FCMEQ, fcmeq)
4327 DO_FPCMP(FCMNE, fcmne)
4328 DO_FPCMP(FCMUO, fcmuo)
4329 DO_FPCMP(FACGE, facge)
4330 DO_FPCMP(FACGT, facgt)
4331 
4332 #undef DO_FPCMP
4333 
4334 static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
4335     NULL,                   gen_helper_sve_fcadd_h,
4336     gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
4337 };
4338 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
4339            a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
4340            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
4341 
4342 #define DO_FMLA(NAME, name, ah_name)                                    \
4343     static gen_helper_gvec_5_ptr * const name##_fns[4] = {              \
4344         NULL, gen_helper_sve_##name##_h,                                \
4345         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d            \
4346     };                                                                  \
4347     static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = {           \
4348         NULL, gen_helper_sve_##ah_name##_h,                             \
4349         gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d      \
4350     };                                                                  \
4351     TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp,                     \
4352                s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
4353                a->rd, a->rn, a->rm, a->ra, a->pg, 0,                    \
4354                a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
4355 
4356 /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
4357 DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
4358 DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz)
4359 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz)
4360 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz)
4361 
4362 #undef DO_FMLA
4363 
4364 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
4365     NULL,                         gen_helper_sve_fcmla_zpzzz_h,
4366     gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
4367 };
4368 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
4369            a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2),
4370            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
4371 
4372 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
4373     NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
4374 };
4375 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
4376            a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
4377            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
4378 
4379 /*
4380  *** SVE Floating Point Unary Operations Predicated Group
4381  */
4382 
4383 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4384            gen_helper_sve_fcvt_sh, a, 0, FPST_A64)
4385 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4386            gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16)
4387 
4388 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
4389            gen_helper_sve_bfcvt, a, 0,
4390            s->fpcr_ah ? FPST_AH : FPST_A64)
4391 
4392 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4393            gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
4394 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4395            gen_helper_sve_fcvt_hd, a, 0, FPST_A64_F16)
4396 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4397            gen_helper_sve_fcvt_ds, a, 0, FPST_A64)
4398 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4399            gen_helper_sve_fcvt_sd, a, 0, FPST_A64)
4400 
4401 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4402            gen_helper_sve_fcvtzs_hh, a, 0, FPST_A64_F16)
4403 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4404            gen_helper_sve_fcvtzu_hh, a, 0, FPST_A64_F16)
4405 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4406            gen_helper_sve_fcvtzs_hs, a, 0, FPST_A64_F16)
4407 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4408            gen_helper_sve_fcvtzu_hs, a, 0, FPST_A64_F16)
4409 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4410            gen_helper_sve_fcvtzs_hd, a, 0, FPST_A64_F16)
4411 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4412            gen_helper_sve_fcvtzu_hd, a, 0, FPST_A64_F16)
4413 
4414 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4415            gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64)
4416 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4417            gen_helper_sve_fcvtzu_ss, a, 0, FPST_A64)
4418 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4419            gen_helper_sve_fcvtzs_sd, a, 0, FPST_A64)
4420 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4421            gen_helper_sve_fcvtzu_sd, a, 0, FPST_A64)
4422 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4423            gen_helper_sve_fcvtzs_ds, a, 0, FPST_A64)
4424 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4425            gen_helper_sve_fcvtzu_ds, a, 0, FPST_A64)
4426 
4427 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4428            gen_helper_sve_fcvtzs_dd, a, 0, FPST_A64)
4429 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4430            gen_helper_sve_fcvtzu_dd, a, 0, FPST_A64)
4431 
4432 static gen_helper_gvec_3_ptr * const frint_fns[] = {
4433     NULL,
4434     gen_helper_sve_frint_h,
4435     gen_helper_sve_frint_s,
4436     gen_helper_sve_frint_d
4437 };
4438 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
4439            a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
4440 
4441 static gen_helper_gvec_3_ptr * const frintx_fns[] = {
4442     NULL,
4443     gen_helper_sve_frintx_h,
4444     gen_helper_sve_frintx_s,
4445     gen_helper_sve_frintx_d
4446 };
4447 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
4448            a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
4449 
4450 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4451                           ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
4452 {
4453     unsigned vsz;
4454     TCGv_i32 tmode;
4455     TCGv_ptr status;
4456 
4457     if (fn == NULL) {
4458         return false;
4459     }
4460     if (!sve_access_check(s)) {
4461         return true;
4462     }
4463 
4464     vsz = vec_full_reg_size(s);
4465     status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
4466     tmode = gen_set_rmode(mode, status);
4467 
4468     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4469                        vec_full_reg_offset(s, a->rn),
4470                        pred_full_reg_offset(s, a->pg),
4471                        status, vsz, vsz, 0, fn);
4472 
4473     gen_restore_rmode(tmode, status);
4474     return true;
4475 }
4476 
4477 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
4478            FPROUNDING_TIEEVEN, frint_fns[a->esz])
4479 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
4480            FPROUNDING_POSINF, frint_fns[a->esz])
4481 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
4482            FPROUNDING_NEGINF, frint_fns[a->esz])
4483 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
4484            FPROUNDING_ZERO, frint_fns[a->esz])
4485 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
4486            FPROUNDING_TIEAWAY, frint_fns[a->esz])
4487 
4488 static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
4489     NULL,                    gen_helper_sve_frecpx_h,
4490     gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
4491 };
4492 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
4493            a, 0, select_ah_fpst(s, a->esz))
4494 
4495 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
4496     NULL,                   gen_helper_sve_fsqrt_h,
4497     gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
4498 };
4499 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
4500            a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
4501 
4502 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4503            gen_helper_sve_scvt_hh, a, 0, FPST_A64_F16)
4504 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4505            gen_helper_sve_scvt_sh, a, 0, FPST_A64_F16)
4506 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4507            gen_helper_sve_scvt_dh, a, 0, FPST_A64_F16)
4508 
4509 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4510            gen_helper_sve_scvt_ss, a, 0, FPST_A64)
4511 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4512            gen_helper_sve_scvt_ds, a, 0, FPST_A64)
4513 
4514 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4515            gen_helper_sve_scvt_sd, a, 0, FPST_A64)
4516 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4517            gen_helper_sve_scvt_dd, a, 0, FPST_A64)
4518 
4519 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4520            gen_helper_sve_ucvt_hh, a, 0, FPST_A64_F16)
4521 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4522            gen_helper_sve_ucvt_sh, a, 0, FPST_A64_F16)
4523 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4524            gen_helper_sve_ucvt_dh, a, 0, FPST_A64_F16)
4525 
4526 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4527            gen_helper_sve_ucvt_ss, a, 0, FPST_A64)
4528 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4529            gen_helper_sve_ucvt_ds, a, 0, FPST_A64)
4530 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4531            gen_helper_sve_ucvt_sd, a, 0, FPST_A64)
4532 
4533 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4534            gen_helper_sve_ucvt_dd, a, 0, FPST_A64)
4535 
4536 /*
4537  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4538  */
4539 
4540 /* Subroutine loading a vector register at VOFS of LEN bytes.
4541  * The load should begin at the address Rn + IMM.
4542  */
4543 
4544 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
4545                  int len, int rn, int imm, MemOp align)
4546 {
4547     int len_align = QEMU_ALIGN_DOWN(len, 16);
4548     int len_remain = len % 16;
4549     int nparts = len / 16 + ctpop8(len_remain);
4550     int midx = get_mem_index(s);
4551     TCGv_i64 dirty_addr, clean_addr, t0, t1;
4552     TCGv_i128 t16;
4553 
4554     dirty_addr = tcg_temp_new_i64();
4555     tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4556     clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4557 
4558     /*
4559      * Note that unpredicated load/store of vector/predicate registers
4560      * are defined as a stream of bytes, which equates to little-endian
4561      * operations on larger quantities.
4562      * Attempt to keep code expansion to a minimum by limiting the
4563      * amount of unrolling done.
4564      */
4565     if (nparts <= 4) {
4566         int i;
4567 
4568         t0 = tcg_temp_new_i64();
4569         t1 = tcg_temp_new_i64();
4570         t16 = tcg_temp_new_i128();
4571 
4572         for (i = 0; i < len_align; i += 16) {
4573             tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
4574                                  MO_LE | MO_128 | MO_ATOM_NONE | align);
4575             tcg_gen_extr_i128_i64(t0, t1, t16);
4576             tcg_gen_st_i64(t0, base, vofs + i);
4577             tcg_gen_st_i64(t1, base, vofs + i + 8);
4578             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4579         }
4580         if (len_align) {
4581             align = MO_UNALN;
4582         }
4583     } else {
4584         TCGLabel *loop = gen_new_label();
4585         TCGv_ptr tp, i = tcg_temp_new_ptr();
4586 
4587         tcg_gen_movi_ptr(i, 0);
4588         gen_set_label(loop);
4589 
4590         t16 = tcg_temp_new_i128();
4591         tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
4592                              MO_LE | MO_128 | MO_ATOM_NONE | align);
4593         tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4594 
4595         tp = tcg_temp_new_ptr();
4596         tcg_gen_add_ptr(tp, base, i);
4597         tcg_gen_addi_ptr(i, i, 16);
4598 
4599         t0 = tcg_temp_new_i64();
4600         t1 = tcg_temp_new_i64();
4601         tcg_gen_extr_i128_i64(t0, t1, t16);
4602 
4603         tcg_gen_st_i64(t0, tp, vofs);
4604         tcg_gen_st_i64(t1, tp, vofs + 8);
4605 
4606         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4607         align = MO_UNALN;
4608     }
4609 
4610     /*
4611      * Predicate register loads can be any multiple of 2.
4612      * Note that we still store the entire 64-bit unit into tcg_env.
4613      */
4614     if (len_remain >= 8) {
4615         t0 = tcg_temp_new_i64();
4616         tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4617                             MO_LEUQ | MO_ATOM_NONE | align);
4618         align = MO_UNALN;
4619         tcg_gen_st_i64(t0, base, vofs + len_align);
4620         len_remain -= 8;
4621         len_align += 8;
4622         if (len_remain) {
4623             tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4624         }
4625     }
4626     if (len_remain) {
4627         t0 = tcg_temp_new_i64();
4628         switch (len_remain) {
4629         case 2:
4630         case 4:
4631         case 8:
4632             tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4633                                 MO_LE | ctz32(len_remain)
4634                                 | MO_ATOM_NONE | align);
4635             break;
4636 
4637         case 6:
4638             t1 = tcg_temp_new_i64();
4639             tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4640                                 MO_LEUL | MO_ATOM_NONE | align);
4641             tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4642             tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
4643             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4644             break;
4645 
4646         default:
4647             g_assert_not_reached();
4648         }
4649         tcg_gen_st_i64(t0, base, vofs + len_align);
4650     }
4651 }
4652 
4653 /* Similarly for stores.  */
4654 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
4655                  int len, int rn, int imm, MemOp align)
4656 {
4657     int len_align = QEMU_ALIGN_DOWN(len, 16);
4658     int len_remain = len % 16;
4659     int nparts = len / 16 + ctpop8(len_remain);
4660     int midx = get_mem_index(s);
4661     TCGv_i64 dirty_addr, clean_addr, t0, t1;
4662     TCGv_i128 t16;
4663 
4664     dirty_addr = tcg_temp_new_i64();
4665     tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4666     clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4667 
4668     /* Note that unpredicated load/store of vector/predicate registers
4669      * are defined as a stream of bytes, which equates to little-endian
4670      * operations on larger quantities.  There is no nice way to force
4671      * a little-endian store for aarch64_be-linux-user out of line.
4672      *
4673      * Attempt to keep code expansion to a minimum by limiting the
4674      * amount of unrolling done.
4675      */
4676     if (nparts <= 4) {
4677         int i;
4678 
4679         t0 = tcg_temp_new_i64();
4680         t1 = tcg_temp_new_i64();
4681         t16 = tcg_temp_new_i128();
4682         for (i = 0; i < len_align; i += 16) {
4683             tcg_gen_ld_i64(t0, base, vofs + i);
4684             tcg_gen_ld_i64(t1, base, vofs + i + 8);
4685             tcg_gen_concat_i64_i128(t16, t0, t1);
4686             tcg_gen_qemu_st_i128(t16, clean_addr, midx,
4687                                  MO_LE | MO_128 | MO_ATOM_NONE | align);
4688             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4689         }
4690         if (len_align) {
4691             align = MO_UNALN;
4692         }
4693     } else {
4694         TCGLabel *loop = gen_new_label();
4695         TCGv_ptr tp, i = tcg_temp_new_ptr();
4696 
4697         tcg_gen_movi_ptr(i, 0);
4698         gen_set_label(loop);
4699 
4700         t0 = tcg_temp_new_i64();
4701         t1 = tcg_temp_new_i64();
4702         tp = tcg_temp_new_ptr();
4703         tcg_gen_add_ptr(tp, base, i);
4704         tcg_gen_ld_i64(t0, tp, vofs);
4705         tcg_gen_ld_i64(t1, tp, vofs + 8);
4706         tcg_gen_addi_ptr(i, i, 16);
4707 
4708         t16 = tcg_temp_new_i128();
4709         tcg_gen_concat_i64_i128(t16, t0, t1);
4710 
4711         tcg_gen_qemu_st_i128(t16, clean_addr, midx,
4712                              MO_LE | MO_128 | MO_ATOM_NONE);
4713         tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4714 
4715         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4716         align = MO_UNALN;
4717     }
4718 
4719     /* Predicate register stores can be any multiple of 2.  */
4720     if (len_remain >= 8) {
4721         t0 = tcg_temp_new_i64();
4722         tcg_gen_ld_i64(t0, base, vofs + len_align);
4723         tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4724                             MO_LEUQ | MO_ATOM_NONE | align);
4725         align = MO_UNALN;
4726         len_remain -= 8;
4727         len_align += 8;
4728         if (len_remain) {
4729             tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4730         }
4731     }
4732     if (len_remain) {
4733         t0 = tcg_temp_new_i64();
4734         tcg_gen_ld_i64(t0, base, vofs + len_align);
4735 
4736         switch (len_remain) {
4737         case 2:
4738         case 4:
4739         case 8:
4740             tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4741                                 MO_LE | ctz32(len_remain)
4742                                 | MO_ATOM_NONE | align);
4743             break;
4744 
4745         case 6:
4746             tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4747                                 MO_LEUL | MO_ATOM_NONE | align);
4748             tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4749             tcg_gen_shri_i64(t0, t0, 32);
4750             tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
4751             break;
4752 
4753         default:
4754             g_assert_not_reached();
4755         }
4756     }
4757 }
4758 
4759 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4760 {
4761     if (!dc_isar_feature(aa64_sve, s)) {
4762         return false;
4763     }
4764     if (sve_access_check(s)) {
4765         int size = vec_full_reg_size(s);
4766         int off = vec_full_reg_offset(s, a->rd);
4767         gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size,
4768                     s->align_mem ? MO_ALIGN_16 : MO_UNALN);
4769     }
4770     return true;
4771 }
4772 
4773 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4774 {
4775     if (!dc_isar_feature(aa64_sve, s)) {
4776         return false;
4777     }
4778     if (sve_access_check(s)) {
4779         int size = pred_full_reg_size(s);
4780         int off = pred_full_reg_offset(s, a->rd);
4781         gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size,
4782                     s->align_mem ? MO_ALIGN_2 : MO_UNALN);
4783     }
4784     return true;
4785 }
4786 
4787 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4788 {
4789     if (!dc_isar_feature(aa64_sve, s)) {
4790         return false;
4791     }
4792     if (sve_access_check(s)) {
4793         int size = vec_full_reg_size(s);
4794         int off = vec_full_reg_offset(s, a->rd);
4795         gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size,
4796                     s->align_mem ? MO_ALIGN_16 : MO_UNALN);
4797     }
4798     return true;
4799 }
4800 
4801 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4802 {
4803     if (!dc_isar_feature(aa64_sve, s)) {
4804         return false;
4805     }
4806     if (sve_access_check(s)) {
4807         int size = pred_full_reg_size(s);
4808         int off = pred_full_reg_offset(s, a->rd);
4809         gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size,
4810                     s->align_mem ? MO_ALIGN_2 : MO_UNALN);
4811     }
4812     return true;
4813 }
4814 
4815 /*
4816  *** SVE Memory - Contiguous Load Group
4817  */
4818 
4819 /* The memory mode of the dtype.  */
4820 static const MemOp dtype_mop[19] = {
4821     MO_UB, MO_UB, MO_UB, MO_UB,
4822     MO_SL, MO_UW, MO_UW, MO_UW,
4823     MO_SW, MO_SW, MO_UL, MO_UL,
4824     MO_SB, MO_SB, MO_SB, MO_UQ,
4825     /* Artificial values used by decode */
4826     MO_UL, MO_UQ, MO_128,
4827 };
4828 
4829 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4830 
4831 /* The vector element size of dtype.  */
4832 static const uint8_t dtype_esz[19] = {
4833     0, 1, 2, 3,
4834     3, 1, 2, 3,
4835     3, 2, 2, 3,
4836     3, 2, 1, 3,
4837     /* Artificial values used by decode */
4838     4, 4, 4,
4839 };
4840 
4841 uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
4842                           uint32_t msz, bool is_write, uint32_t data)
4843 {
4844     uint32_t sizem1;
4845     uint32_t desc = 0;
4846 
4847     /* Assert all of the data fits, with or without MTE enabled. */
4848     assert(nregs >= 1 && nregs <= 4);
4849     sizem1 = (nregs << msz) - 1;
4850     assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT);
4851     assert(data < 1u << SVE_MTEDESC_SHIFT);
4852 
4853     if (s->mte_active[0]) {
4854         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4855         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4856         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4857         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4858         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1);
4859         desc <<= SVE_MTEDESC_SHIFT;
4860     }
4861     return simd_desc(vsz, vsz, desc | data);
4862 }
4863 
4864 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4865                        int dtype, uint32_t nregs, bool is_write,
4866                        gen_helper_gvec_mem *fn)
4867 {
4868     TCGv_ptr t_pg;
4869     uint32_t desc;
4870 
4871     if (!s->mte_active[0]) {
4872         addr = clean_data_tbi(s, addr);
4873     }
4874 
4875     /*
4876      * For e.g. LD4, there are not enough arguments to pass all 4
4877      * registers as pointers, so encode the regno into the data field.
4878      * For consistency, do this even for LD1.
4879      */
4880     desc = make_svemte_desc(s, vec_full_reg_size(s), nregs,
4881                             dtype_msz(dtype), is_write, zt);
4882     t_pg = tcg_temp_new_ptr();
4883 
4884     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
4885     fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
4886 }
4887 
4888 /* Indexed by [mte][be][dtype][nreg] */
4889 static gen_helper_gvec_mem * const ldr_fns[2][2][19][4] = {
4890     { /* mte inactive, little-endian */
4891       { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4892           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4893         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4894         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4895         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4896 
4897         { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4898         { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4899           gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4900         { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4901         { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4902 
4903         { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4904         { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4905         { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4906           gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4907         { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4908 
4909         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4910         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4911         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4912         { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4913           gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r },
4914 
4915         { gen_helper_sve_ld1squ_le_r, NULL, NULL, NULL },
4916         { gen_helper_sve_ld1dqu_le_r, NULL, NULL, NULL },
4917         { NULL,                      gen_helper_sve_ld2qq_le_r,
4918           gen_helper_sve_ld3qq_le_r, gen_helper_sve_ld4qq_le_r },
4919       },
4920 
4921       /* mte inactive, big-endian */
4922       { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4923           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4924         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4925         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4926         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4927 
4928         { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4929         { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4930           gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4931         { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4932         { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4933 
4934         { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4935         { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4936         { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4937           gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4938         { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4939 
4940         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4941         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4942         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4943         { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4944           gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r },
4945 
4946         { gen_helper_sve_ld1squ_be_r, NULL, NULL, NULL },
4947         { gen_helper_sve_ld1dqu_be_r, NULL, NULL, NULL },
4948         { NULL,                      gen_helper_sve_ld2qq_be_r,
4949           gen_helper_sve_ld3qq_be_r, gen_helper_sve_ld4qq_be_r },
4950       },
4951     },
4952 
4953     { /* mte active, little-endian */
4954       { { gen_helper_sve_ld1bb_r_mte,
4955           gen_helper_sve_ld2bb_r_mte,
4956           gen_helper_sve_ld3bb_r_mte,
4957           gen_helper_sve_ld4bb_r_mte },
4958         { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4959         { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4960         { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4961 
4962         { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4963         { gen_helper_sve_ld1hh_le_r_mte,
4964           gen_helper_sve_ld2hh_le_r_mte,
4965           gen_helper_sve_ld3hh_le_r_mte,
4966           gen_helper_sve_ld4hh_le_r_mte },
4967         { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4968         { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4969 
4970         { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4971         { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4972         { gen_helper_sve_ld1ss_le_r_mte,
4973           gen_helper_sve_ld2ss_le_r_mte,
4974           gen_helper_sve_ld3ss_le_r_mte,
4975           gen_helper_sve_ld4ss_le_r_mte },
4976         { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4977 
4978         { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4979         { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4980         { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4981         { gen_helper_sve_ld1dd_le_r_mte,
4982           gen_helper_sve_ld2dd_le_r_mte,
4983           gen_helper_sve_ld3dd_le_r_mte,
4984           gen_helper_sve_ld4dd_le_r_mte },
4985 
4986         { gen_helper_sve_ld1squ_le_r_mte, NULL, NULL, NULL },
4987         { gen_helper_sve_ld1dqu_le_r_mte, NULL, NULL, NULL },
4988         { NULL,
4989           gen_helper_sve_ld2qq_le_r_mte,
4990           gen_helper_sve_ld3qq_le_r_mte,
4991           gen_helper_sve_ld4qq_le_r_mte },
4992       },
4993 
4994       /* mte active, big-endian */
4995       { { gen_helper_sve_ld1bb_r_mte,
4996           gen_helper_sve_ld2bb_r_mte,
4997           gen_helper_sve_ld3bb_r_mte,
4998           gen_helper_sve_ld4bb_r_mte },
4999         { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5000         { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5001         { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5002 
5003         { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5004         { gen_helper_sve_ld1hh_be_r_mte,
5005           gen_helper_sve_ld2hh_be_r_mte,
5006           gen_helper_sve_ld3hh_be_r_mte,
5007           gen_helper_sve_ld4hh_be_r_mte },
5008         { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5009         { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5010 
5011         { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5012         { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5013         { gen_helper_sve_ld1ss_be_r_mte,
5014           gen_helper_sve_ld2ss_be_r_mte,
5015           gen_helper_sve_ld3ss_be_r_mte,
5016           gen_helper_sve_ld4ss_be_r_mte },
5017         { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5018 
5019         { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5020         { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5021         { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5022         { gen_helper_sve_ld1dd_be_r_mte,
5023           gen_helper_sve_ld2dd_be_r_mte,
5024           gen_helper_sve_ld3dd_be_r_mte,
5025           gen_helper_sve_ld4dd_be_r_mte },
5026 
5027         { gen_helper_sve_ld1squ_be_r_mte, NULL, NULL, NULL },
5028         { gen_helper_sve_ld1dqu_be_r_mte, NULL, NULL, NULL },
5029         { NULL,
5030           gen_helper_sve_ld2qq_be_r_mte,
5031           gen_helper_sve_ld3qq_be_r_mte,
5032           gen_helper_sve_ld4qq_be_r_mte },
5033       },
5034     },
5035 };
5036 
5037 static void do_ld_zpa(DisasContext *s, int zt, int pg,
5038                       TCGv_i64 addr, int dtype, int nreg)
5039 {
5040     gen_helper_gvec_mem *fn
5041         = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
5042 
5043     /*
5044      * While there are holes in the table, they are not
5045      * accessible via the instruction encoding.
5046      */
5047     assert(fn != NULL);
5048     do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn);
5049 }
5050 
5051 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
5052 {
5053     if (a->rm == 31) {
5054         return false;
5055     }
5056 
5057     /* dtypes 16-18 are artificial, representing 128-bit element */
5058     switch (a->dtype) {
5059     case 0 ... 15:
5060         if (!dc_isar_feature(aa64_sve, s)) {
5061             return false;
5062         }
5063         break;
5064     case 16: case 17:
5065         if (!dc_isar_feature(aa64_sve2p1, s)) {
5066             return false;
5067         }
5068         s->is_nonstreaming = true;
5069         break;
5070     case 18:
5071         if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
5072             return false;
5073         }
5074         break;
5075     default:
5076         g_assert_not_reached();
5077     }
5078 
5079     if (sve_access_check(s)) {
5080         TCGv_i64 addr = tcg_temp_new_i64();
5081         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5082         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5083         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5084     }
5085     return true;
5086 }
5087 
5088 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
5089 {
5090     /* dtypes 16-18 are artificial, representing 128-bit element */
5091     switch (a->dtype) {
5092     case 0 ... 15:
5093         if (!dc_isar_feature(aa64_sve, s)) {
5094             return false;
5095         }
5096         break;
5097     case 16: case 17:
5098         if (!dc_isar_feature(aa64_sve2p1, s)) {
5099             return false;
5100         }
5101         s->is_nonstreaming = true;
5102         break;
5103     case 18:
5104         if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
5105             return false;
5106         }
5107         break;
5108     default:
5109         g_assert_not_reached();
5110     }
5111 
5112     if (sve_access_check(s)) {
5113         int vsz = vec_full_reg_size(s);
5114         int elements = vsz >> dtype_esz[a->dtype];
5115         TCGv_i64 addr = tcg_temp_new_i64();
5116 
5117         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5118                          (a->imm * elements * (a->nreg + 1))
5119                          << dtype_msz(a->dtype));
5120         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5121     }
5122     return true;
5123 }
5124 
5125 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
5126 {
5127     static gen_helper_gvec_mem * const fns[2][2][16] = {
5128         { /* mte inactive, little-endian */
5129           { gen_helper_sve_ldff1bb_r,
5130             gen_helper_sve_ldff1bhu_r,
5131             gen_helper_sve_ldff1bsu_r,
5132             gen_helper_sve_ldff1bdu_r,
5133 
5134             gen_helper_sve_ldff1sds_le_r,
5135             gen_helper_sve_ldff1hh_le_r,
5136             gen_helper_sve_ldff1hsu_le_r,
5137             gen_helper_sve_ldff1hdu_le_r,
5138 
5139             gen_helper_sve_ldff1hds_le_r,
5140             gen_helper_sve_ldff1hss_le_r,
5141             gen_helper_sve_ldff1ss_le_r,
5142             gen_helper_sve_ldff1sdu_le_r,
5143 
5144             gen_helper_sve_ldff1bds_r,
5145             gen_helper_sve_ldff1bss_r,
5146             gen_helper_sve_ldff1bhs_r,
5147             gen_helper_sve_ldff1dd_le_r },
5148 
5149           /* mte inactive, big-endian */
5150           { gen_helper_sve_ldff1bb_r,
5151             gen_helper_sve_ldff1bhu_r,
5152             gen_helper_sve_ldff1bsu_r,
5153             gen_helper_sve_ldff1bdu_r,
5154 
5155             gen_helper_sve_ldff1sds_be_r,
5156             gen_helper_sve_ldff1hh_be_r,
5157             gen_helper_sve_ldff1hsu_be_r,
5158             gen_helper_sve_ldff1hdu_be_r,
5159 
5160             gen_helper_sve_ldff1hds_be_r,
5161             gen_helper_sve_ldff1hss_be_r,
5162             gen_helper_sve_ldff1ss_be_r,
5163             gen_helper_sve_ldff1sdu_be_r,
5164 
5165             gen_helper_sve_ldff1bds_r,
5166             gen_helper_sve_ldff1bss_r,
5167             gen_helper_sve_ldff1bhs_r,
5168             gen_helper_sve_ldff1dd_be_r } },
5169 
5170         { /* mte active, little-endian */
5171           { gen_helper_sve_ldff1bb_r_mte,
5172             gen_helper_sve_ldff1bhu_r_mte,
5173             gen_helper_sve_ldff1bsu_r_mte,
5174             gen_helper_sve_ldff1bdu_r_mte,
5175 
5176             gen_helper_sve_ldff1sds_le_r_mte,
5177             gen_helper_sve_ldff1hh_le_r_mte,
5178             gen_helper_sve_ldff1hsu_le_r_mte,
5179             gen_helper_sve_ldff1hdu_le_r_mte,
5180 
5181             gen_helper_sve_ldff1hds_le_r_mte,
5182             gen_helper_sve_ldff1hss_le_r_mte,
5183             gen_helper_sve_ldff1ss_le_r_mte,
5184             gen_helper_sve_ldff1sdu_le_r_mte,
5185 
5186             gen_helper_sve_ldff1bds_r_mte,
5187             gen_helper_sve_ldff1bss_r_mte,
5188             gen_helper_sve_ldff1bhs_r_mte,
5189             gen_helper_sve_ldff1dd_le_r_mte },
5190 
5191           /* mte active, big-endian */
5192           { gen_helper_sve_ldff1bb_r_mte,
5193             gen_helper_sve_ldff1bhu_r_mte,
5194             gen_helper_sve_ldff1bsu_r_mte,
5195             gen_helper_sve_ldff1bdu_r_mte,
5196 
5197             gen_helper_sve_ldff1sds_be_r_mte,
5198             gen_helper_sve_ldff1hh_be_r_mte,
5199             gen_helper_sve_ldff1hsu_be_r_mte,
5200             gen_helper_sve_ldff1hdu_be_r_mte,
5201 
5202             gen_helper_sve_ldff1hds_be_r_mte,
5203             gen_helper_sve_ldff1hss_be_r_mte,
5204             gen_helper_sve_ldff1ss_be_r_mte,
5205             gen_helper_sve_ldff1sdu_be_r_mte,
5206 
5207             gen_helper_sve_ldff1bds_r_mte,
5208             gen_helper_sve_ldff1bss_r_mte,
5209             gen_helper_sve_ldff1bhs_r_mte,
5210             gen_helper_sve_ldff1dd_be_r_mte } },
5211     };
5212 
5213     if (!dc_isar_feature(aa64_sve, s)) {
5214         return false;
5215     }
5216     s->is_nonstreaming = true;
5217     if (sve_access_check(s)) {
5218         TCGv_i64 addr = tcg_temp_new_i64();
5219         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5220         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5221         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5222                    fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
5223     }
5224     return true;
5225 }
5226 
5227 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
5228 {
5229     static gen_helper_gvec_mem * const fns[2][2][16] = {
5230         { /* mte inactive, little-endian */
5231           { gen_helper_sve_ldnf1bb_r,
5232             gen_helper_sve_ldnf1bhu_r,
5233             gen_helper_sve_ldnf1bsu_r,
5234             gen_helper_sve_ldnf1bdu_r,
5235 
5236             gen_helper_sve_ldnf1sds_le_r,
5237             gen_helper_sve_ldnf1hh_le_r,
5238             gen_helper_sve_ldnf1hsu_le_r,
5239             gen_helper_sve_ldnf1hdu_le_r,
5240 
5241             gen_helper_sve_ldnf1hds_le_r,
5242             gen_helper_sve_ldnf1hss_le_r,
5243             gen_helper_sve_ldnf1ss_le_r,
5244             gen_helper_sve_ldnf1sdu_le_r,
5245 
5246             gen_helper_sve_ldnf1bds_r,
5247             gen_helper_sve_ldnf1bss_r,
5248             gen_helper_sve_ldnf1bhs_r,
5249             gen_helper_sve_ldnf1dd_le_r },
5250 
5251           /* mte inactive, big-endian */
5252           { gen_helper_sve_ldnf1bb_r,
5253             gen_helper_sve_ldnf1bhu_r,
5254             gen_helper_sve_ldnf1bsu_r,
5255             gen_helper_sve_ldnf1bdu_r,
5256 
5257             gen_helper_sve_ldnf1sds_be_r,
5258             gen_helper_sve_ldnf1hh_be_r,
5259             gen_helper_sve_ldnf1hsu_be_r,
5260             gen_helper_sve_ldnf1hdu_be_r,
5261 
5262             gen_helper_sve_ldnf1hds_be_r,
5263             gen_helper_sve_ldnf1hss_be_r,
5264             gen_helper_sve_ldnf1ss_be_r,
5265             gen_helper_sve_ldnf1sdu_be_r,
5266 
5267             gen_helper_sve_ldnf1bds_r,
5268             gen_helper_sve_ldnf1bss_r,
5269             gen_helper_sve_ldnf1bhs_r,
5270             gen_helper_sve_ldnf1dd_be_r } },
5271 
5272         { /* mte inactive, little-endian */
5273           { gen_helper_sve_ldnf1bb_r_mte,
5274             gen_helper_sve_ldnf1bhu_r_mte,
5275             gen_helper_sve_ldnf1bsu_r_mte,
5276             gen_helper_sve_ldnf1bdu_r_mte,
5277 
5278             gen_helper_sve_ldnf1sds_le_r_mte,
5279             gen_helper_sve_ldnf1hh_le_r_mte,
5280             gen_helper_sve_ldnf1hsu_le_r_mte,
5281             gen_helper_sve_ldnf1hdu_le_r_mte,
5282 
5283             gen_helper_sve_ldnf1hds_le_r_mte,
5284             gen_helper_sve_ldnf1hss_le_r_mte,
5285             gen_helper_sve_ldnf1ss_le_r_mte,
5286             gen_helper_sve_ldnf1sdu_le_r_mte,
5287 
5288             gen_helper_sve_ldnf1bds_r_mte,
5289             gen_helper_sve_ldnf1bss_r_mte,
5290             gen_helper_sve_ldnf1bhs_r_mte,
5291             gen_helper_sve_ldnf1dd_le_r_mte },
5292 
5293           /* mte inactive, big-endian */
5294           { gen_helper_sve_ldnf1bb_r_mte,
5295             gen_helper_sve_ldnf1bhu_r_mte,
5296             gen_helper_sve_ldnf1bsu_r_mte,
5297             gen_helper_sve_ldnf1bdu_r_mte,
5298 
5299             gen_helper_sve_ldnf1sds_be_r_mte,
5300             gen_helper_sve_ldnf1hh_be_r_mte,
5301             gen_helper_sve_ldnf1hsu_be_r_mte,
5302             gen_helper_sve_ldnf1hdu_be_r_mte,
5303 
5304             gen_helper_sve_ldnf1hds_be_r_mte,
5305             gen_helper_sve_ldnf1hss_be_r_mte,
5306             gen_helper_sve_ldnf1ss_be_r_mte,
5307             gen_helper_sve_ldnf1sdu_be_r_mte,
5308 
5309             gen_helper_sve_ldnf1bds_r_mte,
5310             gen_helper_sve_ldnf1bss_r_mte,
5311             gen_helper_sve_ldnf1bhs_r_mte,
5312             gen_helper_sve_ldnf1dd_be_r_mte } },
5313     };
5314 
5315     if (!dc_isar_feature(aa64_sve, s)) {
5316         return false;
5317     }
5318     s->is_nonstreaming = true;
5319     if (sve_access_check(s)) {
5320         int vsz = vec_full_reg_size(s);
5321         int elements = vsz >> dtype_esz[a->dtype];
5322         int off = (a->imm * elements) << dtype_msz(a->dtype);
5323         TCGv_i64 addr = tcg_temp_new_i64();
5324 
5325         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
5326         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5327                    fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
5328     }
5329     return true;
5330 }
5331 
5332 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5333 {
5334     unsigned vsz = vec_full_reg_size(s);
5335     TCGv_ptr t_pg;
5336     int poff;
5337     uint32_t desc;
5338 
5339     /* Load the first quadword using the normal predicated load helpers.  */
5340     if (!s->mte_active[0]) {
5341         addr = clean_data_tbi(s, addr);
5342     }
5343 
5344     poff = pred_full_reg_offset(s, pg);
5345     if (vsz > 16) {
5346         /*
5347          * Zero-extend the first 16 bits of the predicate into a temporary.
5348          * This avoids triggering an assert making sure we don't have bits
5349          * set within a predicate beyond VQ, but we have lowered VQ to 1
5350          * for this load operation.
5351          */
5352         TCGv_i64 tmp = tcg_temp_new_i64();
5353 #if HOST_BIG_ENDIAN
5354         poff += 6;
5355 #endif
5356         tcg_gen_ld16u_i64(tmp, tcg_env, poff);
5357 
5358         poff = offsetof(CPUARMState, vfp.preg_tmp);
5359         tcg_gen_st_i64(tmp, tcg_env, poff);
5360     }
5361 
5362     t_pg = tcg_temp_new_ptr();
5363     tcg_gen_addi_ptr(t_pg, tcg_env, poff);
5364 
5365     gen_helper_gvec_mem *fn
5366         = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5367     desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt);
5368     fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
5369 
5370     /* Replicate that first quadword.  */
5371     if (vsz > 16) {
5372         int doff = vec_full_reg_offset(s, zt);
5373         tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
5374     }
5375 }
5376 
5377 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
5378 {
5379     if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
5380         return false;
5381     }
5382     if (sve_access_check(s)) {
5383         int msz = dtype_msz(a->dtype);
5384         TCGv_i64 addr = tcg_temp_new_i64();
5385         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5386         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5387         do_ldrq(s, a->rd, a->pg, addr, a->dtype);
5388     }
5389     return true;
5390 }
5391 
5392 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
5393 {
5394     if (!dc_isar_feature(aa64_sve, s)) {
5395         return false;
5396     }
5397     if (sve_access_check(s)) {
5398         TCGv_i64 addr = tcg_temp_new_i64();
5399         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
5400         do_ldrq(s, a->rd, a->pg, addr, a->dtype);
5401     }
5402     return true;
5403 }
5404 
5405 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5406 {
5407     unsigned vsz = vec_full_reg_size(s);
5408     unsigned vsz_r32;
5409     TCGv_ptr t_pg;
5410     int poff, doff;
5411     uint32_t desc;
5412 
5413     if (vsz < 32) {
5414         /*
5415          * Note that this UNDEFINED check comes after CheckSVEEnabled()
5416          * in the ARM pseudocode, which is the sve_access_check() done
5417          * in our caller.  We should not now return false from the caller.
5418          */
5419         unallocated_encoding(s);
5420         return;
5421     }
5422 
5423     /* Load the first octaword using the normal predicated load helpers.  */
5424     if (!s->mte_active[0]) {
5425         addr = clean_data_tbi(s, addr);
5426     }
5427 
5428     poff = pred_full_reg_offset(s, pg);
5429     if (vsz > 32) {
5430         /*
5431          * Zero-extend the first 32 bits of the predicate into a temporary.
5432          * This avoids triggering an assert making sure we don't have bits
5433          * set within a predicate beyond VQ, but we have lowered VQ to 2
5434          * for this load operation.
5435          */
5436         TCGv_i64 tmp = tcg_temp_new_i64();
5437 #if HOST_BIG_ENDIAN
5438         poff += 4;
5439 #endif
5440         tcg_gen_ld32u_i64(tmp, tcg_env, poff);
5441 
5442         poff = offsetof(CPUARMState, vfp.preg_tmp);
5443         tcg_gen_st_i64(tmp, tcg_env, poff);
5444     }
5445 
5446     t_pg = tcg_temp_new_ptr();
5447     tcg_gen_addi_ptr(t_pg, tcg_env, poff);
5448 
5449     gen_helper_gvec_mem *fn
5450         = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5451     desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt);
5452     fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
5453 
5454     /*
5455      * Replicate that first octaword.
5456      * The replication happens in units of 32; if the full vector size
5457      * is not a multiple of 32, the final bits are zeroed.
5458      */
5459     doff = vec_full_reg_offset(s, zt);
5460     vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5461     if (vsz >= 64) {
5462         tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5463     }
5464     vsz -= vsz_r32;
5465     if (vsz) {
5466         tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5467     }
5468 }
5469 
5470 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5471 {
5472     if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5473         return false;
5474     }
5475     if (a->rm == 31) {
5476         return false;
5477     }
5478     s->is_nonstreaming = true;
5479     if (sve_access_check(s)) {
5480         TCGv_i64 addr = tcg_temp_new_i64();
5481         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5482         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5483         do_ldro(s, a->rd, a->pg, addr, a->dtype);
5484     }
5485     return true;
5486 }
5487 
5488 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5489 {
5490     if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5491         return false;
5492     }
5493     s->is_nonstreaming = true;
5494     if (sve_access_check(s)) {
5495         TCGv_i64 addr = tcg_temp_new_i64();
5496         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5497         do_ldro(s, a->rd, a->pg, addr, a->dtype);
5498     }
5499     return true;
5500 }
5501 
5502 /* Load and broadcast element.  */
5503 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
5504 {
5505     unsigned vsz = vec_full_reg_size(s);
5506     unsigned psz = pred_full_reg_size(s);
5507     unsigned esz = dtype_esz[a->dtype];
5508     unsigned msz = dtype_msz(a->dtype);
5509     TCGLabel *over;
5510     TCGv_i64 temp, clean_addr;
5511     MemOp memop;
5512 
5513     if (!dc_isar_feature(aa64_sve, s)) {
5514         return false;
5515     }
5516     if (!sve_access_check(s)) {
5517         return true;
5518     }
5519 
5520     over = gen_new_label();
5521 
5522     /* If the guarding predicate has no bits set, no load occurs.  */
5523     if (psz <= 8) {
5524         /* Reduce the pred_esz_masks value simply to reduce the
5525          * size of the code generated here.
5526          */
5527         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5528         temp = tcg_temp_new_i64();
5529         tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg));
5530         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5531         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5532     } else {
5533         TCGv_i32 t32 = tcg_temp_new_i32();
5534         find_last_active(s, t32, esz, a->pg);
5535         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5536     }
5537 
5538     /* Load the data.  */
5539     temp = tcg_temp_new_i64();
5540     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
5541 
5542     memop = finalize_memop(s, dtype_mop[a->dtype]);
5543     clean_addr = gen_mte_check1(s, temp, false, true, memop);
5544     tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop);
5545 
5546     /* Broadcast to *all* elements.  */
5547     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5548                          vsz, vsz, temp);
5549 
5550     /* Zero the inactive elements.  */
5551     gen_set_label(over);
5552     return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
5553 }
5554 
5555 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5556                       int msz, int esz, int nreg)
5557 {
5558     static gen_helper_gvec_mem * const fn_single[2][2][4][5] = {
5559         { { { gen_helper_sve_st1bb_r,
5560               gen_helper_sve_st1bh_r,
5561               gen_helper_sve_st1bs_r,
5562               gen_helper_sve_st1bd_r },
5563             { NULL,
5564               gen_helper_sve_st1hh_le_r,
5565               gen_helper_sve_st1hs_le_r,
5566               gen_helper_sve_st1hd_le_r },
5567             { NULL, NULL,
5568               gen_helper_sve_st1ss_le_r,
5569               gen_helper_sve_st1sd_le_r,
5570               gen_helper_sve_st1sq_le_r, },
5571             { NULL, NULL, NULL,
5572               gen_helper_sve_st1dd_le_r,
5573               gen_helper_sve_st1dq_le_r, } },
5574           { { gen_helper_sve_st1bb_r,
5575               gen_helper_sve_st1bh_r,
5576               gen_helper_sve_st1bs_r,
5577               gen_helper_sve_st1bd_r },
5578             { NULL,
5579               gen_helper_sve_st1hh_be_r,
5580               gen_helper_sve_st1hs_be_r,
5581               gen_helper_sve_st1hd_be_r },
5582             { NULL, NULL,
5583               gen_helper_sve_st1ss_be_r,
5584               gen_helper_sve_st1sd_be_r,
5585               gen_helper_sve_st1sq_be_r },
5586             { NULL, NULL, NULL,
5587               gen_helper_sve_st1dd_be_r,
5588               gen_helper_sve_st1dq_be_r } } },
5589 
5590         { { { gen_helper_sve_st1bb_r_mte,
5591               gen_helper_sve_st1bh_r_mte,
5592               gen_helper_sve_st1bs_r_mte,
5593               gen_helper_sve_st1bd_r_mte },
5594             { NULL,
5595               gen_helper_sve_st1hh_le_r_mte,
5596               gen_helper_sve_st1hs_le_r_mte,
5597               gen_helper_sve_st1hd_le_r_mte },
5598             { NULL, NULL,
5599               gen_helper_sve_st1ss_le_r_mte,
5600               gen_helper_sve_st1sd_le_r_mte,
5601               gen_helper_sve_st1sq_le_r_mte },
5602             { NULL, NULL, NULL,
5603               gen_helper_sve_st1dd_le_r_mte,
5604               gen_helper_sve_st1dq_le_r_mte } },
5605           { { gen_helper_sve_st1bb_r_mte,
5606               gen_helper_sve_st1bh_r_mte,
5607               gen_helper_sve_st1bs_r_mte,
5608               gen_helper_sve_st1bd_r_mte },
5609             { NULL,
5610               gen_helper_sve_st1hh_be_r_mte,
5611               gen_helper_sve_st1hs_be_r_mte,
5612               gen_helper_sve_st1hd_be_r_mte },
5613             { NULL, NULL,
5614               gen_helper_sve_st1ss_be_r_mte,
5615               gen_helper_sve_st1sd_be_r_mte,
5616               gen_helper_sve_st1sq_be_r_mte },
5617             { NULL, NULL, NULL,
5618               gen_helper_sve_st1dd_be_r_mte,
5619               gen_helper_sve_st1dq_be_r_mte } } },
5620     };
5621     static gen_helper_gvec_mem * const fn_multiple[2][2][3][5] = {
5622         { { { gen_helper_sve_st2bb_r,
5623               gen_helper_sve_st2hh_le_r,
5624               gen_helper_sve_st2ss_le_r,
5625               gen_helper_sve_st2dd_le_r,
5626               gen_helper_sve_st2qq_le_r },
5627             { gen_helper_sve_st3bb_r,
5628               gen_helper_sve_st3hh_le_r,
5629               gen_helper_sve_st3ss_le_r,
5630               gen_helper_sve_st3dd_le_r,
5631               gen_helper_sve_st3qq_le_r },
5632             { gen_helper_sve_st4bb_r,
5633               gen_helper_sve_st4hh_le_r,
5634               gen_helper_sve_st4ss_le_r,
5635               gen_helper_sve_st4dd_le_r,
5636               gen_helper_sve_st4qq_le_r } },
5637           { { gen_helper_sve_st2bb_r,
5638               gen_helper_sve_st2hh_be_r,
5639               gen_helper_sve_st2ss_be_r,
5640               gen_helper_sve_st2dd_be_r,
5641               gen_helper_sve_st2qq_be_r },
5642             { gen_helper_sve_st3bb_r,
5643               gen_helper_sve_st3hh_be_r,
5644               gen_helper_sve_st3ss_be_r,
5645               gen_helper_sve_st3dd_be_r,
5646               gen_helper_sve_st3qq_be_r },
5647             { gen_helper_sve_st4bb_r,
5648               gen_helper_sve_st4hh_be_r,
5649               gen_helper_sve_st4ss_be_r,
5650               gen_helper_sve_st4dd_be_r,
5651               gen_helper_sve_st4qq_be_r } } },
5652         { { { gen_helper_sve_st2bb_r_mte,
5653               gen_helper_sve_st2hh_le_r_mte,
5654               gen_helper_sve_st2ss_le_r_mte,
5655               gen_helper_sve_st2dd_le_r_mte,
5656               gen_helper_sve_st2qq_le_r_mte },
5657             { gen_helper_sve_st3bb_r_mte,
5658               gen_helper_sve_st3hh_le_r_mte,
5659               gen_helper_sve_st3ss_le_r_mte,
5660               gen_helper_sve_st3dd_le_r_mte,
5661               gen_helper_sve_st3qq_le_r_mte },
5662             { gen_helper_sve_st4bb_r_mte,
5663               gen_helper_sve_st4hh_le_r_mte,
5664               gen_helper_sve_st4ss_le_r_mte,
5665               gen_helper_sve_st4dd_le_r_mte,
5666               gen_helper_sve_st4qq_le_r_mte } },
5667           { { gen_helper_sve_st2bb_r_mte,
5668               gen_helper_sve_st2hh_be_r_mte,
5669               gen_helper_sve_st2ss_be_r_mte,
5670               gen_helper_sve_st2dd_be_r_mte,
5671               gen_helper_sve_st2qq_be_r_mte },
5672             { gen_helper_sve_st3bb_r_mte,
5673               gen_helper_sve_st3hh_be_r_mte,
5674               gen_helper_sve_st3ss_be_r_mte,
5675               gen_helper_sve_st3dd_be_r_mte,
5676               gen_helper_sve_st3qq_be_r_mte },
5677             { gen_helper_sve_st4bb_r_mte,
5678               gen_helper_sve_st4hh_be_r_mte,
5679               gen_helper_sve_st4ss_be_r_mte,
5680               gen_helper_sve_st4dd_be_r_mte,
5681               gen_helper_sve_st4qq_be_r_mte } } },
5682     };
5683     gen_helper_gvec_mem *fn;
5684     int be = s->be_data == MO_BE;
5685 
5686     if (nreg == 0) {
5687         /* ST1 */
5688         fn = fn_single[s->mte_active[0]][be][msz][esz];
5689     } else {
5690         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5691         assert(msz == esz);
5692         fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
5693     }
5694     assert(fn != NULL);
5695     do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn);
5696 }
5697 
5698 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5699 {
5700     if (a->rm == 31 || a->msz > a->esz) {
5701         return false;
5702     }
5703     switch (a->esz) {
5704     case MO_8 ... MO_64:
5705         if (!dc_isar_feature(aa64_sve, s)) {
5706             return false;
5707         }
5708         break;
5709     case MO_128:
5710         if (a->nreg == 0) {
5711             assert(a->msz < a->esz);
5712             if (!dc_isar_feature(aa64_sve2p1, s)) {
5713                 return false;
5714             }
5715             s->is_nonstreaming = true;
5716         } else {
5717             if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
5718                 return false;
5719             }
5720         }
5721         break;
5722     default:
5723         g_assert_not_reached();
5724     }
5725 
5726     if (sve_access_check(s)) {
5727         TCGv_i64 addr = tcg_temp_new_i64();
5728         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5729         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5730         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5731     }
5732     return true;
5733 }
5734 
5735 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5736 {
5737     if (a->msz > a->esz) {
5738         return false;
5739     }
5740     switch (a->esz) {
5741     case MO_8 ... MO_64:
5742         if (!dc_isar_feature(aa64_sve, s)) {
5743             return false;
5744         }
5745         break;
5746     case MO_128:
5747         if (a->nreg == 0) {
5748             assert(a->msz < a->esz);
5749             if (!dc_isar_feature(aa64_sve2p1, s)) {
5750                 return false;
5751             }
5752             s->is_nonstreaming = true;
5753         } else {
5754             if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
5755                 return false;
5756             }
5757         }
5758         break;
5759     default:
5760         g_assert_not_reached();
5761     }
5762 
5763     if (sve_access_check(s)) {
5764         int vsz = vec_full_reg_size(s);
5765         int elements = vsz >> a->esz;
5766         TCGv_i64 addr = tcg_temp_new_i64();
5767 
5768         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5769                          (a->imm * elements * (a->nreg + 1)) << a->msz);
5770         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5771     }
5772     return true;
5773 }
5774 
5775 /*
5776  *** SVE gather loads / scatter stores
5777  */
5778 
5779 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5780                        int scale, TCGv_i64 scalar, int msz, bool is_write,
5781                        gen_helper_gvec_mem_scatter *fn)
5782 {
5783     TCGv_ptr t_zm = tcg_temp_new_ptr();
5784     TCGv_ptr t_pg = tcg_temp_new_ptr();
5785     TCGv_ptr t_zt = tcg_temp_new_ptr();
5786     uint32_t desc;
5787 
5788     tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
5789     tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm));
5790     tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt));
5791 
5792     desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale);
5793     fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
5794 }
5795 
5796 /* Indexed by [mte][be][ff][xs][u][msz].  */
5797 static gen_helper_gvec_mem_scatter * const
5798 gather_load_fn32[2][2][2][2][2][3] = {
5799     { /* MTE Inactive */
5800         { /* Little-endian */
5801             { { { gen_helper_sve_ldbss_zsu,
5802                   gen_helper_sve_ldhss_le_zsu,
5803                   NULL, },
5804                 { gen_helper_sve_ldbsu_zsu,
5805                   gen_helper_sve_ldhsu_le_zsu,
5806                   gen_helper_sve_ldss_le_zsu, } },
5807               { { gen_helper_sve_ldbss_zss,
5808                   gen_helper_sve_ldhss_le_zss,
5809                   NULL, },
5810                 { gen_helper_sve_ldbsu_zss,
5811                   gen_helper_sve_ldhsu_le_zss,
5812                   gen_helper_sve_ldss_le_zss, } } },
5813 
5814             /* First-fault */
5815             { { { gen_helper_sve_ldffbss_zsu,
5816                   gen_helper_sve_ldffhss_le_zsu,
5817                   NULL, },
5818                 { gen_helper_sve_ldffbsu_zsu,
5819                   gen_helper_sve_ldffhsu_le_zsu,
5820                   gen_helper_sve_ldffss_le_zsu, } },
5821               { { gen_helper_sve_ldffbss_zss,
5822                   gen_helper_sve_ldffhss_le_zss,
5823                   NULL, },
5824                 { gen_helper_sve_ldffbsu_zss,
5825                   gen_helper_sve_ldffhsu_le_zss,
5826                   gen_helper_sve_ldffss_le_zss, } } } },
5827 
5828         { /* Big-endian */
5829             { { { gen_helper_sve_ldbss_zsu,
5830                   gen_helper_sve_ldhss_be_zsu,
5831                   NULL, },
5832                 { gen_helper_sve_ldbsu_zsu,
5833                   gen_helper_sve_ldhsu_be_zsu,
5834                   gen_helper_sve_ldss_be_zsu, } },
5835               { { gen_helper_sve_ldbss_zss,
5836                   gen_helper_sve_ldhss_be_zss,
5837                   NULL, },
5838                 { gen_helper_sve_ldbsu_zss,
5839                   gen_helper_sve_ldhsu_be_zss,
5840                   gen_helper_sve_ldss_be_zss, } } },
5841 
5842             /* First-fault */
5843             { { { gen_helper_sve_ldffbss_zsu,
5844                   gen_helper_sve_ldffhss_be_zsu,
5845                   NULL, },
5846                 { gen_helper_sve_ldffbsu_zsu,
5847                   gen_helper_sve_ldffhsu_be_zsu,
5848                   gen_helper_sve_ldffss_be_zsu, } },
5849               { { gen_helper_sve_ldffbss_zss,
5850                   gen_helper_sve_ldffhss_be_zss,
5851                   NULL, },
5852                 { gen_helper_sve_ldffbsu_zss,
5853                   gen_helper_sve_ldffhsu_be_zss,
5854                   gen_helper_sve_ldffss_be_zss, } } } } },
5855     { /* MTE Active */
5856         { /* Little-endian */
5857             { { { gen_helper_sve_ldbss_zsu_mte,
5858                   gen_helper_sve_ldhss_le_zsu_mte,
5859                   NULL, },
5860                 { gen_helper_sve_ldbsu_zsu_mte,
5861                   gen_helper_sve_ldhsu_le_zsu_mte,
5862                   gen_helper_sve_ldss_le_zsu_mte, } },
5863               { { gen_helper_sve_ldbss_zss_mte,
5864                   gen_helper_sve_ldhss_le_zss_mte,
5865                   NULL, },
5866                 { gen_helper_sve_ldbsu_zss_mte,
5867                   gen_helper_sve_ldhsu_le_zss_mte,
5868                   gen_helper_sve_ldss_le_zss_mte, } } },
5869 
5870             /* First-fault */
5871             { { { gen_helper_sve_ldffbss_zsu_mte,
5872                   gen_helper_sve_ldffhss_le_zsu_mte,
5873                   NULL, },
5874                 { gen_helper_sve_ldffbsu_zsu_mte,
5875                   gen_helper_sve_ldffhsu_le_zsu_mte,
5876                   gen_helper_sve_ldffss_le_zsu_mte, } },
5877               { { gen_helper_sve_ldffbss_zss_mte,
5878                   gen_helper_sve_ldffhss_le_zss_mte,
5879                   NULL, },
5880                 { gen_helper_sve_ldffbsu_zss_mte,
5881                   gen_helper_sve_ldffhsu_le_zss_mte,
5882                   gen_helper_sve_ldffss_le_zss_mte, } } } },
5883 
5884         { /* Big-endian */
5885             { { { gen_helper_sve_ldbss_zsu_mte,
5886                   gen_helper_sve_ldhss_be_zsu_mte,
5887                   NULL, },
5888                 { gen_helper_sve_ldbsu_zsu_mte,
5889                   gen_helper_sve_ldhsu_be_zsu_mte,
5890                   gen_helper_sve_ldss_be_zsu_mte, } },
5891               { { gen_helper_sve_ldbss_zss_mte,
5892                   gen_helper_sve_ldhss_be_zss_mte,
5893                   NULL, },
5894                 { gen_helper_sve_ldbsu_zss_mte,
5895                   gen_helper_sve_ldhsu_be_zss_mte,
5896                   gen_helper_sve_ldss_be_zss_mte, } } },
5897 
5898             /* First-fault */
5899             { { { gen_helper_sve_ldffbss_zsu_mte,
5900                   gen_helper_sve_ldffhss_be_zsu_mte,
5901                   NULL, },
5902                 { gen_helper_sve_ldffbsu_zsu_mte,
5903                   gen_helper_sve_ldffhsu_be_zsu_mte,
5904                   gen_helper_sve_ldffss_be_zsu_mte, } },
5905               { { gen_helper_sve_ldffbss_zss_mte,
5906                   gen_helper_sve_ldffhss_be_zss_mte,
5907                   NULL, },
5908                 { gen_helper_sve_ldffbsu_zss_mte,
5909                   gen_helper_sve_ldffhsu_be_zss_mte,
5910                   gen_helper_sve_ldffss_be_zss_mte, } } } } },
5911 };
5912 
5913 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5914 static gen_helper_gvec_mem_scatter * const
5915 gather_load_fn64[2][2][2][3][2][4] = {
5916     { /* MTE Inactive */
5917         { /* Little-endian */
5918             { { { gen_helper_sve_ldbds_zsu,
5919                   gen_helper_sve_ldhds_le_zsu,
5920                   gen_helper_sve_ldsds_le_zsu,
5921                   NULL, },
5922                 { gen_helper_sve_ldbdu_zsu,
5923                   gen_helper_sve_ldhdu_le_zsu,
5924                   gen_helper_sve_ldsdu_le_zsu,
5925                   gen_helper_sve_lddd_le_zsu, } },
5926               { { gen_helper_sve_ldbds_zss,
5927                   gen_helper_sve_ldhds_le_zss,
5928                   gen_helper_sve_ldsds_le_zss,
5929                   NULL, },
5930                 { gen_helper_sve_ldbdu_zss,
5931                   gen_helper_sve_ldhdu_le_zss,
5932                   gen_helper_sve_ldsdu_le_zss,
5933                   gen_helper_sve_lddd_le_zss, } },
5934               { { gen_helper_sve_ldbds_zd,
5935                   gen_helper_sve_ldhds_le_zd,
5936                   gen_helper_sve_ldsds_le_zd,
5937                   NULL, },
5938                 { gen_helper_sve_ldbdu_zd,
5939                   gen_helper_sve_ldhdu_le_zd,
5940                   gen_helper_sve_ldsdu_le_zd,
5941                   gen_helper_sve_lddd_le_zd, } } },
5942 
5943             /* First-fault */
5944             { { { gen_helper_sve_ldffbds_zsu,
5945                   gen_helper_sve_ldffhds_le_zsu,
5946                   gen_helper_sve_ldffsds_le_zsu,
5947                   NULL, },
5948                 { gen_helper_sve_ldffbdu_zsu,
5949                   gen_helper_sve_ldffhdu_le_zsu,
5950                   gen_helper_sve_ldffsdu_le_zsu,
5951                   gen_helper_sve_ldffdd_le_zsu, } },
5952               { { gen_helper_sve_ldffbds_zss,
5953                   gen_helper_sve_ldffhds_le_zss,
5954                   gen_helper_sve_ldffsds_le_zss,
5955                   NULL, },
5956                 { gen_helper_sve_ldffbdu_zss,
5957                   gen_helper_sve_ldffhdu_le_zss,
5958                   gen_helper_sve_ldffsdu_le_zss,
5959                   gen_helper_sve_ldffdd_le_zss, } },
5960               { { gen_helper_sve_ldffbds_zd,
5961                   gen_helper_sve_ldffhds_le_zd,
5962                   gen_helper_sve_ldffsds_le_zd,
5963                   NULL, },
5964                 { gen_helper_sve_ldffbdu_zd,
5965                   gen_helper_sve_ldffhdu_le_zd,
5966                   gen_helper_sve_ldffsdu_le_zd,
5967                   gen_helper_sve_ldffdd_le_zd, } } } },
5968         { /* Big-endian */
5969             { { { gen_helper_sve_ldbds_zsu,
5970                   gen_helper_sve_ldhds_be_zsu,
5971                   gen_helper_sve_ldsds_be_zsu,
5972                   NULL, },
5973                 { gen_helper_sve_ldbdu_zsu,
5974                   gen_helper_sve_ldhdu_be_zsu,
5975                   gen_helper_sve_ldsdu_be_zsu,
5976                   gen_helper_sve_lddd_be_zsu, } },
5977               { { gen_helper_sve_ldbds_zss,
5978                   gen_helper_sve_ldhds_be_zss,
5979                   gen_helper_sve_ldsds_be_zss,
5980                   NULL, },
5981                 { gen_helper_sve_ldbdu_zss,
5982                   gen_helper_sve_ldhdu_be_zss,
5983                   gen_helper_sve_ldsdu_be_zss,
5984                   gen_helper_sve_lddd_be_zss, } },
5985               { { gen_helper_sve_ldbds_zd,
5986                   gen_helper_sve_ldhds_be_zd,
5987                   gen_helper_sve_ldsds_be_zd,
5988                   NULL, },
5989                 { gen_helper_sve_ldbdu_zd,
5990                   gen_helper_sve_ldhdu_be_zd,
5991                   gen_helper_sve_ldsdu_be_zd,
5992                   gen_helper_sve_lddd_be_zd, } } },
5993 
5994             /* First-fault */
5995             { { { gen_helper_sve_ldffbds_zsu,
5996                   gen_helper_sve_ldffhds_be_zsu,
5997                   gen_helper_sve_ldffsds_be_zsu,
5998                   NULL, },
5999                 { gen_helper_sve_ldffbdu_zsu,
6000                   gen_helper_sve_ldffhdu_be_zsu,
6001                   gen_helper_sve_ldffsdu_be_zsu,
6002                   gen_helper_sve_ldffdd_be_zsu, } },
6003               { { gen_helper_sve_ldffbds_zss,
6004                   gen_helper_sve_ldffhds_be_zss,
6005                   gen_helper_sve_ldffsds_be_zss,
6006                   NULL, },
6007                 { gen_helper_sve_ldffbdu_zss,
6008                   gen_helper_sve_ldffhdu_be_zss,
6009                   gen_helper_sve_ldffsdu_be_zss,
6010                   gen_helper_sve_ldffdd_be_zss, } },
6011               { { gen_helper_sve_ldffbds_zd,
6012                   gen_helper_sve_ldffhds_be_zd,
6013                   gen_helper_sve_ldffsds_be_zd,
6014                   NULL, },
6015                 { gen_helper_sve_ldffbdu_zd,
6016                   gen_helper_sve_ldffhdu_be_zd,
6017                   gen_helper_sve_ldffsdu_be_zd,
6018                   gen_helper_sve_ldffdd_be_zd, } } } } },
6019     { /* MTE Active */
6020         { /* Little-endian */
6021             { { { gen_helper_sve_ldbds_zsu_mte,
6022                   gen_helper_sve_ldhds_le_zsu_mte,
6023                   gen_helper_sve_ldsds_le_zsu_mte,
6024                   NULL, },
6025                 { gen_helper_sve_ldbdu_zsu_mte,
6026                   gen_helper_sve_ldhdu_le_zsu_mte,
6027                   gen_helper_sve_ldsdu_le_zsu_mte,
6028                   gen_helper_sve_lddd_le_zsu_mte, } },
6029               { { gen_helper_sve_ldbds_zss_mte,
6030                   gen_helper_sve_ldhds_le_zss_mte,
6031                   gen_helper_sve_ldsds_le_zss_mte,
6032                   NULL, },
6033                 { gen_helper_sve_ldbdu_zss_mte,
6034                   gen_helper_sve_ldhdu_le_zss_mte,
6035                   gen_helper_sve_ldsdu_le_zss_mte,
6036                   gen_helper_sve_lddd_le_zss_mte, } },
6037               { { gen_helper_sve_ldbds_zd_mte,
6038                   gen_helper_sve_ldhds_le_zd_mte,
6039                   gen_helper_sve_ldsds_le_zd_mte,
6040                   NULL, },
6041                 { gen_helper_sve_ldbdu_zd_mte,
6042                   gen_helper_sve_ldhdu_le_zd_mte,
6043                   gen_helper_sve_ldsdu_le_zd_mte,
6044                   gen_helper_sve_lddd_le_zd_mte, } } },
6045 
6046             /* First-fault */
6047             { { { gen_helper_sve_ldffbds_zsu_mte,
6048                   gen_helper_sve_ldffhds_le_zsu_mte,
6049                   gen_helper_sve_ldffsds_le_zsu_mte,
6050                   NULL, },
6051                 { gen_helper_sve_ldffbdu_zsu_mte,
6052                   gen_helper_sve_ldffhdu_le_zsu_mte,
6053                   gen_helper_sve_ldffsdu_le_zsu_mte,
6054                   gen_helper_sve_ldffdd_le_zsu_mte, } },
6055               { { gen_helper_sve_ldffbds_zss_mte,
6056                   gen_helper_sve_ldffhds_le_zss_mte,
6057                   gen_helper_sve_ldffsds_le_zss_mte,
6058                   NULL, },
6059                 { gen_helper_sve_ldffbdu_zss_mte,
6060                   gen_helper_sve_ldffhdu_le_zss_mte,
6061                   gen_helper_sve_ldffsdu_le_zss_mte,
6062                   gen_helper_sve_ldffdd_le_zss_mte, } },
6063               { { gen_helper_sve_ldffbds_zd_mte,
6064                   gen_helper_sve_ldffhds_le_zd_mte,
6065                   gen_helper_sve_ldffsds_le_zd_mte,
6066                   NULL, },
6067                 { gen_helper_sve_ldffbdu_zd_mte,
6068                   gen_helper_sve_ldffhdu_le_zd_mte,
6069                   gen_helper_sve_ldffsdu_le_zd_mte,
6070                   gen_helper_sve_ldffdd_le_zd_mte, } } } },
6071         { /* Big-endian */
6072             { { { gen_helper_sve_ldbds_zsu_mte,
6073                   gen_helper_sve_ldhds_be_zsu_mte,
6074                   gen_helper_sve_ldsds_be_zsu_mte,
6075                   NULL, },
6076                 { gen_helper_sve_ldbdu_zsu_mte,
6077                   gen_helper_sve_ldhdu_be_zsu_mte,
6078                   gen_helper_sve_ldsdu_be_zsu_mte,
6079                   gen_helper_sve_lddd_be_zsu_mte, } },
6080               { { gen_helper_sve_ldbds_zss_mte,
6081                   gen_helper_sve_ldhds_be_zss_mte,
6082                   gen_helper_sve_ldsds_be_zss_mte,
6083                   NULL, },
6084                 { gen_helper_sve_ldbdu_zss_mte,
6085                   gen_helper_sve_ldhdu_be_zss_mte,
6086                   gen_helper_sve_ldsdu_be_zss_mte,
6087                   gen_helper_sve_lddd_be_zss_mte, } },
6088               { { gen_helper_sve_ldbds_zd_mte,
6089                   gen_helper_sve_ldhds_be_zd_mte,
6090                   gen_helper_sve_ldsds_be_zd_mte,
6091                   NULL, },
6092                 { gen_helper_sve_ldbdu_zd_mte,
6093                   gen_helper_sve_ldhdu_be_zd_mte,
6094                   gen_helper_sve_ldsdu_be_zd_mte,
6095                   gen_helper_sve_lddd_be_zd_mte, } } },
6096 
6097             /* First-fault */
6098             { { { gen_helper_sve_ldffbds_zsu_mte,
6099                   gen_helper_sve_ldffhds_be_zsu_mte,
6100                   gen_helper_sve_ldffsds_be_zsu_mte,
6101                   NULL, },
6102                 { gen_helper_sve_ldffbdu_zsu_mte,
6103                   gen_helper_sve_ldffhdu_be_zsu_mte,
6104                   gen_helper_sve_ldffsdu_be_zsu_mte,
6105                   gen_helper_sve_ldffdd_be_zsu_mte, } },
6106               { { gen_helper_sve_ldffbds_zss_mte,
6107                   gen_helper_sve_ldffhds_be_zss_mte,
6108                   gen_helper_sve_ldffsds_be_zss_mte,
6109                   NULL, },
6110                 { gen_helper_sve_ldffbdu_zss_mte,
6111                   gen_helper_sve_ldffhdu_be_zss_mte,
6112                   gen_helper_sve_ldffsdu_be_zss_mte,
6113                   gen_helper_sve_ldffdd_be_zss_mte, } },
6114               { { gen_helper_sve_ldffbds_zd_mte,
6115                   gen_helper_sve_ldffhds_be_zd_mte,
6116                   gen_helper_sve_ldffsds_be_zd_mte,
6117                   NULL, },
6118                 { gen_helper_sve_ldffbdu_zd_mte,
6119                   gen_helper_sve_ldffhdu_be_zd_mte,
6120                   gen_helper_sve_ldffsdu_be_zd_mte,
6121                   gen_helper_sve_ldffdd_be_zd_mte, } } } } },
6122 };
6123 
6124 static gen_helper_gvec_mem_scatter * const
6125 gather_load_fn128[2][2] = {
6126     { gen_helper_sve_ldqq_le_zd,
6127       gen_helper_sve_ldqq_be_zd },
6128     { gen_helper_sve_ldqq_le_zd_mte,
6129       gen_helper_sve_ldqq_be_zd_mte }
6130 };
6131 
6132 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
6133 {
6134     gen_helper_gvec_mem_scatter *fn = NULL;
6135     bool be = s->be_data == MO_BE;
6136     bool mte = s->mte_active[0];
6137 
6138     if (a->esz < MO_128
6139         ? !dc_isar_feature(aa64_sve, s)
6140         : !dc_isar_feature(aa64_sve2p1, s)) {
6141         return false;
6142     }
6143     s->is_nonstreaming = true;
6144     if (!sve_access_check(s)) {
6145         return true;
6146     }
6147 
6148     switch (a->esz) {
6149     case MO_32:
6150         fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
6151         break;
6152     case MO_64:
6153         fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
6154         break;
6155     case MO_128:
6156         assert(!a->ff && a->u && a->xs == 2 && a->msz == MO_128);
6157         fn = gather_load_fn128[mte][be];
6158         break;
6159     default:
6160         g_assert_not_reached();
6161     }
6162     assert(fn != NULL);
6163 
6164     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
6165                cpu_reg_sp(s, a->rn), a->msz, false, fn);
6166     return true;
6167 }
6168 
6169 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
6170 {
6171     gen_helper_gvec_mem_scatter *fn = NULL;
6172     bool be = s->be_data == MO_BE;
6173     bool mte = s->mte_active[0];
6174 
6175     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6176         return false;
6177     }
6178     if (!dc_isar_feature(aa64_sve, s)) {
6179         return false;
6180     }
6181     s->is_nonstreaming = true;
6182     if (!sve_access_check(s)) {
6183         return true;
6184     }
6185 
6186     switch (a->esz) {
6187     case MO_32:
6188         fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
6189         break;
6190     case MO_64:
6191         fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
6192         break;
6193     }
6194     assert(fn != NULL);
6195 
6196     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6197      * by loading the immediate into the scalar parameter.
6198      */
6199     do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6200                tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
6201     return true;
6202 }
6203 
6204 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6205 {
6206     gen_helper_gvec_mem_scatter *fn = NULL;
6207     bool be = s->be_data == MO_BE;
6208     bool mte = s->mte_active[0];
6209 
6210     if (a->esz < a->msz + !a->u) {
6211         return false;
6212     }
6213     if (!dc_isar_feature(aa64_sve2, s)) {
6214         return false;
6215     }
6216     s->is_nonstreaming = true;
6217     if (!sve_access_check(s)) {
6218         return true;
6219     }
6220 
6221     switch (a->esz) {
6222     case MO_32:
6223         fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
6224         break;
6225     case MO_64:
6226         fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
6227         break;
6228     }
6229     assert(fn != NULL);
6230 
6231     do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6232                cpu_reg(s, a->rm), a->msz, false, fn);
6233     return true;
6234 }
6235 
6236 /* Indexed by [mte][be][xs][msz].  */
6237 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6238     { /* MTE Inactive */
6239         { /* Little-endian */
6240             { gen_helper_sve_stbs_zsu,
6241               gen_helper_sve_sths_le_zsu,
6242               gen_helper_sve_stss_le_zsu, },
6243             { gen_helper_sve_stbs_zss,
6244               gen_helper_sve_sths_le_zss,
6245               gen_helper_sve_stss_le_zss, } },
6246         { /* Big-endian */
6247             { gen_helper_sve_stbs_zsu,
6248               gen_helper_sve_sths_be_zsu,
6249               gen_helper_sve_stss_be_zsu, },
6250             { gen_helper_sve_stbs_zss,
6251               gen_helper_sve_sths_be_zss,
6252               gen_helper_sve_stss_be_zss, } } },
6253     { /* MTE Active */
6254         { /* Little-endian */
6255             { gen_helper_sve_stbs_zsu_mte,
6256               gen_helper_sve_sths_le_zsu_mte,
6257               gen_helper_sve_stss_le_zsu_mte, },
6258             { gen_helper_sve_stbs_zss_mte,
6259               gen_helper_sve_sths_le_zss_mte,
6260               gen_helper_sve_stss_le_zss_mte, } },
6261         { /* Big-endian */
6262             { gen_helper_sve_stbs_zsu_mte,
6263               gen_helper_sve_sths_be_zsu_mte,
6264               gen_helper_sve_stss_be_zsu_mte, },
6265             { gen_helper_sve_stbs_zss_mte,
6266               gen_helper_sve_sths_be_zss_mte,
6267               gen_helper_sve_stss_be_zss_mte, } } },
6268 };
6269 
6270 /* Note that we overload xs=2 to indicate 64-bit offset.  */
6271 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6272     { /* MTE Inactive */
6273          { /* Little-endian */
6274              { gen_helper_sve_stbd_zsu,
6275                gen_helper_sve_sthd_le_zsu,
6276                gen_helper_sve_stsd_le_zsu,
6277                gen_helper_sve_stdd_le_zsu, },
6278              { gen_helper_sve_stbd_zss,
6279                gen_helper_sve_sthd_le_zss,
6280                gen_helper_sve_stsd_le_zss,
6281                gen_helper_sve_stdd_le_zss, },
6282              { gen_helper_sve_stbd_zd,
6283                gen_helper_sve_sthd_le_zd,
6284                gen_helper_sve_stsd_le_zd,
6285                gen_helper_sve_stdd_le_zd, } },
6286          { /* Big-endian */
6287              { gen_helper_sve_stbd_zsu,
6288                gen_helper_sve_sthd_be_zsu,
6289                gen_helper_sve_stsd_be_zsu,
6290                gen_helper_sve_stdd_be_zsu, },
6291              { gen_helper_sve_stbd_zss,
6292                gen_helper_sve_sthd_be_zss,
6293                gen_helper_sve_stsd_be_zss,
6294                gen_helper_sve_stdd_be_zss, },
6295              { gen_helper_sve_stbd_zd,
6296                gen_helper_sve_sthd_be_zd,
6297                gen_helper_sve_stsd_be_zd,
6298                gen_helper_sve_stdd_be_zd, } } },
6299     { /* MTE Inactive */
6300          { /* Little-endian */
6301              { gen_helper_sve_stbd_zsu_mte,
6302                gen_helper_sve_sthd_le_zsu_mte,
6303                gen_helper_sve_stsd_le_zsu_mte,
6304                gen_helper_sve_stdd_le_zsu_mte, },
6305              { gen_helper_sve_stbd_zss_mte,
6306                gen_helper_sve_sthd_le_zss_mte,
6307                gen_helper_sve_stsd_le_zss_mte,
6308                gen_helper_sve_stdd_le_zss_mte, },
6309              { gen_helper_sve_stbd_zd_mte,
6310                gen_helper_sve_sthd_le_zd_mte,
6311                gen_helper_sve_stsd_le_zd_mte,
6312                gen_helper_sve_stdd_le_zd_mte, } },
6313          { /* Big-endian */
6314              { gen_helper_sve_stbd_zsu_mte,
6315                gen_helper_sve_sthd_be_zsu_mte,
6316                gen_helper_sve_stsd_be_zsu_mte,
6317                gen_helper_sve_stdd_be_zsu_mte, },
6318              { gen_helper_sve_stbd_zss_mte,
6319                gen_helper_sve_sthd_be_zss_mte,
6320                gen_helper_sve_stsd_be_zss_mte,
6321                gen_helper_sve_stdd_be_zss_mte, },
6322              { gen_helper_sve_stbd_zd_mte,
6323                gen_helper_sve_sthd_be_zd_mte,
6324                gen_helper_sve_stsd_be_zd_mte,
6325                gen_helper_sve_stdd_be_zd_mte, } } },
6326 };
6327 
6328 static gen_helper_gvec_mem_scatter * const
6329 scatter_store_fn128[2][2] = {
6330     { gen_helper_sve_stqq_le_zd,
6331       gen_helper_sve_stqq_be_zd },
6332     { gen_helper_sve_stqq_le_zd_mte,
6333       gen_helper_sve_stqq_be_zd_mte }
6334 };
6335 
6336 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
6337 {
6338     gen_helper_gvec_mem_scatter *fn;
6339     bool be = s->be_data == MO_BE;
6340     bool mte = s->mte_active[0];
6341 
6342     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6343         return false;
6344     }
6345     if (a->esz < MO_128
6346         ? !dc_isar_feature(aa64_sve, s)
6347         : !dc_isar_feature(aa64_sve2p1, s)) {
6348         return false;
6349     }
6350     s->is_nonstreaming = true;
6351     if (!sve_access_check(s)) {
6352         return true;
6353     }
6354     switch (a->esz) {
6355     case MO_32:
6356         fn = scatter_store_fn32[mte][be][a->xs][a->msz];
6357         break;
6358     case MO_64:
6359         fn = scatter_store_fn64[mte][be][a->xs][a->msz];
6360         break;
6361     case MO_128:
6362         assert(a->xs == 2 && a->msz == MO_128);
6363         fn = scatter_store_fn128[mte][be];
6364         break;
6365     default:
6366         g_assert_not_reached();
6367     }
6368     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
6369                cpu_reg_sp(s, a->rn), a->msz, true, fn);
6370     return true;
6371 }
6372 
6373 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
6374 {
6375     gen_helper_gvec_mem_scatter *fn = NULL;
6376     bool be = s->be_data == MO_BE;
6377     bool mte = s->mte_active[0];
6378 
6379     if (a->esz < a->msz) {
6380         return false;
6381     }
6382     if (!dc_isar_feature(aa64_sve, s)) {
6383         return false;
6384     }
6385     s->is_nonstreaming = true;
6386     if (!sve_access_check(s)) {
6387         return true;
6388     }
6389 
6390     switch (a->esz) {
6391     case MO_32:
6392         fn = scatter_store_fn32[mte][be][0][a->msz];
6393         break;
6394     case MO_64:
6395         fn = scatter_store_fn64[mte][be][2][a->msz];
6396         break;
6397     }
6398     assert(fn != NULL);
6399 
6400     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6401      * by loading the immediate into the scalar parameter.
6402      */
6403     do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6404                tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
6405     return true;
6406 }
6407 
6408 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6409 {
6410     gen_helper_gvec_mem_scatter *fn;
6411     bool be = s->be_data == MO_BE;
6412     bool mte = s->mte_active[0];
6413 
6414     if (a->esz < a->msz) {
6415         return false;
6416     }
6417     if (!dc_isar_feature(aa64_sve2, s)) {
6418         return false;
6419     }
6420     s->is_nonstreaming = true;
6421     if (!sve_access_check(s)) {
6422         return true;
6423     }
6424 
6425     switch (a->esz) {
6426     case MO_32:
6427         fn = scatter_store_fn32[mte][be][0][a->msz];
6428         break;
6429     case MO_64:
6430         fn = scatter_store_fn64[mte][be][2][a->msz];
6431         break;
6432     default:
6433         g_assert_not_reached();
6434     }
6435 
6436     do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6437                cpu_reg(s, a->rm), a->msz, true, fn);
6438     return true;
6439 }
6440 
6441 /*
6442  * Prefetches
6443  */
6444 
6445 static bool trans_PRF(DisasContext *s, arg_PRF *a)
6446 {
6447     if (!dc_isar_feature(aa64_sve, s)) {
6448         return false;
6449     }
6450     /* Prefetch is a nop within QEMU.  */
6451     (void)sve_access_check(s);
6452     return true;
6453 }
6454 
6455 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
6456 {
6457     if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
6458         return false;
6459     }
6460     /* Prefetch is a nop within QEMU.  */
6461     (void)sve_access_check(s);
6462     return true;
6463 }
6464 
6465 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
6466 {
6467     if (!dc_isar_feature(aa64_sve, s)) {
6468         return false;
6469     }
6470     /* Prefetch is a nop within QEMU.  */
6471     s->is_nonstreaming = true;
6472     (void)sve_access_check(s);
6473     return true;
6474 }
6475 
6476 /*
6477  * Move Prefix
6478  *
6479  * TODO: The implementation so far could handle predicated merging movprfx.
6480  * The helper functions as written take an extra source register to
6481  * use in the operation, but the result is only written when predication
6482  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
6483  * to allow the final write back to the destination to be unconditional.
6484  * For predicated zeroing movprfx, we need to rearrange the helpers to
6485  * allow the final write back to zero inactives.
6486  *
6487  * In the meantime, just emit the moves.
6488  */
6489 
6490 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
6491 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
6492 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
6493 
6494 /*
6495  * SVE2 Integer Multiply - Unpredicated
6496  */
6497 
6498 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
6499 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_sve2_sqdmulh, a)
6500 
6501 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
6502     gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6503     gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6504 };
6505 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6506            smulh_zzz_fns[a->esz], a, 0)
6507 
6508 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
6509     gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6510     gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6511 };
6512 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6513            umulh_zzz_fns[a->esz], a, 0)
6514 
6515 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6516            gen_helper_gvec_pmul_b, a, 0)
6517 
6518 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
6519     gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6520     gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6521 };
6522 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6523            sqrdmulh_zzz_fns[a->esz], a, 0)
6524 
6525 /*
6526  * SVE2 Integer - Predicated
6527  */
6528 
6529 static gen_helper_gvec_4 * const sadlp_fns[4] = {
6530     NULL,                          gen_helper_sve2_sadalp_zpzz_h,
6531     gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
6532 };
6533 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6534            sadlp_fns[a->esz], a, 0)
6535 
6536 static gen_helper_gvec_4 * const uadlp_fns[4] = {
6537     NULL,                          gen_helper_sve2_uadalp_zpzz_h,
6538     gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
6539 };
6540 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6541            uadlp_fns[a->esz], a, 0)
6542 
6543 /*
6544  * SVE2 integer unary operations (predicated)
6545  */
6546 
6547 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
6548            a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
6549 
6550 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
6551            a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
6552 
6553 static gen_helper_gvec_3 * const sqabs_fns[4] = {
6554     gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6555     gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6556 };
6557 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
6558 
6559 static gen_helper_gvec_3 * const sqneg_fns[4] = {
6560     gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6561     gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6562 };
6563 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
6564 
6565 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
6566 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
6567 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
6568 
6569 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
6570 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
6571 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
6572 
6573 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
6574 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
6575 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
6576 
6577 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
6578 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
6579 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
6580 
6581 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
6582 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
6583 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
6584 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
6585 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
6586 
6587 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
6588 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
6589 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
6590 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
6591 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
6592 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
6593 
6594 /*
6595  * SVE2 Widening Integer Arithmetic
6596  */
6597 
6598 static gen_helper_gvec_3 * const saddl_fns[4] = {
6599     NULL,                    gen_helper_sve2_saddl_h,
6600     gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
6601 };
6602 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6603            saddl_fns[a->esz], a, 0)
6604 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6605            saddl_fns[a->esz], a, 3)
6606 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6607            saddl_fns[a->esz], a, 2)
6608 
6609 static gen_helper_gvec_3 * const ssubl_fns[4] = {
6610     NULL,                    gen_helper_sve2_ssubl_h,
6611     gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
6612 };
6613 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6614            ssubl_fns[a->esz], a, 0)
6615 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6616            ssubl_fns[a->esz], a, 3)
6617 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6618            ssubl_fns[a->esz], a, 2)
6619 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6620            ssubl_fns[a->esz], a, 1)
6621 
6622 static gen_helper_gvec_3 * const sabdl_fns[4] = {
6623     NULL,                    gen_helper_sve2_sabdl_h,
6624     gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6625 };
6626 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6627            sabdl_fns[a->esz], a, 0)
6628 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6629            sabdl_fns[a->esz], a, 3)
6630 
6631 static gen_helper_gvec_3 * const uaddl_fns[4] = {
6632     NULL,                    gen_helper_sve2_uaddl_h,
6633     gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6634 };
6635 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6636            uaddl_fns[a->esz], a, 0)
6637 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6638            uaddl_fns[a->esz], a, 3)
6639 
6640 static gen_helper_gvec_3 * const usubl_fns[4] = {
6641     NULL,                    gen_helper_sve2_usubl_h,
6642     gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6643 };
6644 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6645            usubl_fns[a->esz], a, 0)
6646 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6647            usubl_fns[a->esz], a, 3)
6648 
6649 static gen_helper_gvec_3 * const uabdl_fns[4] = {
6650     NULL,                    gen_helper_sve2_uabdl_h,
6651     gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6652 };
6653 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6654            uabdl_fns[a->esz], a, 0)
6655 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6656            uabdl_fns[a->esz], a, 3)
6657 
6658 static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6659     NULL,                          gen_helper_sve2_sqdmull_zzz_h,
6660     gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6661 };
6662 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6663            sqdmull_fns[a->esz], a, 0)
6664 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6665            sqdmull_fns[a->esz], a, 3)
6666 
6667 static gen_helper_gvec_3 * const smull_fns[4] = {
6668     NULL,                        gen_helper_sve2_smull_zzz_h,
6669     gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6670 };
6671 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6672            smull_fns[a->esz], a, 0)
6673 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6674            smull_fns[a->esz], a, 3)
6675 
6676 static gen_helper_gvec_3 * const umull_fns[4] = {
6677     NULL,                        gen_helper_sve2_umull_zzz_h,
6678     gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6679 };
6680 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6681            umull_fns[a->esz], a, 0)
6682 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6683            umull_fns[a->esz], a, 3)
6684 
6685 static gen_helper_gvec_3 * const eoril_fns[4] = {
6686     gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6687     gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6688 };
6689 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6690 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
6691 
6692 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6693 {
6694     static gen_helper_gvec_3 * const fns[4] = {
6695         gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6696         NULL,                    gen_helper_sve2_pmull_d,
6697     };
6698 
6699     if (a->esz == 0) {
6700         if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
6701             return false;
6702         }
6703         s->is_nonstreaming = true;
6704     } else if (!dc_isar_feature(aa64_sve, s)) {
6705         return false;
6706     }
6707     return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
6708 }
6709 
6710 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6711 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
6712 
6713 static gen_helper_gvec_3 * const saddw_fns[4] = {
6714     NULL,                    gen_helper_sve2_saddw_h,
6715     gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6716 };
6717 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6718 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
6719 
6720 static gen_helper_gvec_3 * const ssubw_fns[4] = {
6721     NULL,                    gen_helper_sve2_ssubw_h,
6722     gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6723 };
6724 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6725 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
6726 
6727 static gen_helper_gvec_3 * const uaddw_fns[4] = {
6728     NULL,                    gen_helper_sve2_uaddw_h,
6729     gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6730 };
6731 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6732 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
6733 
6734 static gen_helper_gvec_3 * const usubw_fns[4] = {
6735     NULL,                    gen_helper_sve2_usubw_h,
6736     gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6737 };
6738 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6739 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
6740 
6741 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6742 {
6743     int top = imm & 1;
6744     int shl = imm >> 1;
6745     int halfbits = 4 << vece;
6746 
6747     if (top) {
6748         if (shl == halfbits) {
6749             tcg_gen_and_vec(vece, d, n,
6750                             tcg_constant_vec_matching(d, vece,
6751                                 MAKE_64BIT_MASK(halfbits, halfbits)));
6752         } else {
6753             tcg_gen_sari_vec(vece, d, n, halfbits);
6754             tcg_gen_shli_vec(vece, d, d, shl);
6755         }
6756     } else {
6757         tcg_gen_shli_vec(vece, d, n, halfbits);
6758         tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6759     }
6760 }
6761 
6762 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6763 {
6764     int halfbits = 4 << vece;
6765     int top = imm & 1;
6766     int shl = (imm >> 1);
6767     int shift;
6768     uint64_t mask;
6769 
6770     mask = MAKE_64BIT_MASK(0, halfbits);
6771     mask <<= shl;
6772     mask = dup_const(vece, mask);
6773 
6774     shift = shl - top * halfbits;
6775     if (shift < 0) {
6776         tcg_gen_shri_i64(d, n, -shift);
6777     } else {
6778         tcg_gen_shli_i64(d, n, shift);
6779     }
6780     tcg_gen_andi_i64(d, d, mask);
6781 }
6782 
6783 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6784 {
6785     gen_ushll_i64(MO_16, d, n, imm);
6786 }
6787 
6788 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6789 {
6790     gen_ushll_i64(MO_32, d, n, imm);
6791 }
6792 
6793 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6794 {
6795     gen_ushll_i64(MO_64, d, n, imm);
6796 }
6797 
6798 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6799 {
6800     int halfbits = 4 << vece;
6801     int top = imm & 1;
6802     int shl = imm >> 1;
6803 
6804     if (top) {
6805         if (shl == halfbits) {
6806             tcg_gen_and_vec(vece, d, n,
6807                             tcg_constant_vec_matching(d, vece,
6808                                 MAKE_64BIT_MASK(halfbits, halfbits)));
6809         } else {
6810             tcg_gen_shri_vec(vece, d, n, halfbits);
6811             tcg_gen_shli_vec(vece, d, d, shl);
6812         }
6813     } else {
6814         if (shl == 0) {
6815             tcg_gen_and_vec(vece, d, n,
6816                             tcg_constant_vec_matching(d, vece,
6817                                 MAKE_64BIT_MASK(0, halfbits)));
6818         } else {
6819             tcg_gen_shli_vec(vece, d, n, halfbits);
6820             tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6821         }
6822     }
6823 }
6824 
6825 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a,
6826                        const GVecGen2i ops[3], bool sel)
6827 {
6828 
6829     if (a->esz < 0 || a->esz > 2) {
6830         return false;
6831     }
6832     if (sve_access_check(s)) {
6833         unsigned vsz = vec_full_reg_size(s);
6834         tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6835                         vec_full_reg_offset(s, a->rn),
6836                         vsz, vsz, (a->imm << 1) | sel,
6837                         &ops[a->esz]);
6838     }
6839     return true;
6840 }
6841 
6842 static const TCGOpcode sshll_list[] = {
6843     INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6844 };
6845 static const GVecGen2i sshll_ops[3] = {
6846     { .fniv = gen_sshll_vec,
6847       .opt_opc = sshll_list,
6848       .fno = gen_helper_sve2_sshll_h,
6849       .vece = MO_16 },
6850     { .fniv = gen_sshll_vec,
6851       .opt_opc = sshll_list,
6852       .fno = gen_helper_sve2_sshll_s,
6853       .vece = MO_32 },
6854     { .fniv = gen_sshll_vec,
6855       .opt_opc = sshll_list,
6856       .fno = gen_helper_sve2_sshll_d,
6857       .vece = MO_64 }
6858 };
6859 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false)
6860 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true)
6861 
6862 static const TCGOpcode ushll_list[] = {
6863     INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6864 };
6865 static const GVecGen2i ushll_ops[3] = {
6866     { .fni8 = gen_ushll16_i64,
6867       .fniv = gen_ushll_vec,
6868       .opt_opc = ushll_list,
6869       .fno = gen_helper_sve2_ushll_h,
6870       .vece = MO_16 },
6871     { .fni8 = gen_ushll32_i64,
6872       .fniv = gen_ushll_vec,
6873       .opt_opc = ushll_list,
6874       .fno = gen_helper_sve2_ushll_s,
6875       .vece = MO_32 },
6876     { .fni8 = gen_ushll64_i64,
6877       .fniv = gen_ushll_vec,
6878       .opt_opc = ushll_list,
6879       .fno = gen_helper_sve2_ushll_d,
6880       .vece = MO_64 },
6881 };
6882 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false)
6883 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true)
6884 
6885 static gen_helper_gvec_3 * const bext_fns[4] = {
6886     gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6887     gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6888 };
6889 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6890                         bext_fns[a->esz], a, 0)
6891 
6892 static gen_helper_gvec_3 * const bdep_fns[4] = {
6893     gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6894     gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6895 };
6896 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6897                         bdep_fns[a->esz], a, 0)
6898 
6899 static gen_helper_gvec_3 * const bgrp_fns[4] = {
6900     gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6901     gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6902 };
6903 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6904                         bgrp_fns[a->esz], a, 0)
6905 
6906 static gen_helper_gvec_3 * const cadd_fns[4] = {
6907     gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6908     gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6909 };
6910 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6911            cadd_fns[a->esz], a, 0)
6912 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6913            cadd_fns[a->esz], a, 1)
6914 
6915 static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6916     gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6917     gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6918 };
6919 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6920            sqcadd_fns[a->esz], a, 0)
6921 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6922            sqcadd_fns[a->esz], a, 1)
6923 
6924 static gen_helper_gvec_4 * const sabal_fns[4] = {
6925     NULL,                    gen_helper_sve2_sabal_h,
6926     gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6927 };
6928 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6929 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
6930 
6931 static gen_helper_gvec_4 * const uabal_fns[4] = {
6932     NULL,                    gen_helper_sve2_uabal_h,
6933     gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6934 };
6935 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6936 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
6937 
6938 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6939 {
6940     static gen_helper_gvec_4 * const fns[2] = {
6941         gen_helper_sve2_adcl_s,
6942         gen_helper_sve2_adcl_d,
6943     };
6944     /*
6945      * Note that in this case the ESZ field encodes both size and sign.
6946      * Split out 'subtract' into bit 1 of the data field for the helper.
6947      */
6948     return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
6949 }
6950 
6951 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6952 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
6953 
6954 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6955 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6956 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6957 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6958 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6959 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
6960 
6961 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6962 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
6963 
6964 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a,
6965                               const GVecGen2 ops[3])
6966 {
6967     if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) {
6968         return false;
6969     }
6970     if (sve_access_check(s)) {
6971         unsigned vsz = vec_full_reg_size(s);
6972         tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6973                         vec_full_reg_offset(s, a->rn),
6974                         vsz, vsz, &ops[a->esz]);
6975     }
6976     return true;
6977 }
6978 
6979 static const TCGOpcode sqxtn_list[] = {
6980     INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6981 };
6982 
6983 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6984 {
6985     int halfbits = 4 << vece;
6986     int64_t mask = (1ull << halfbits) - 1;
6987     int64_t min = -1ull << (halfbits - 1);
6988     int64_t max = -min - 1;
6989 
6990     tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min));
6991     tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
6992     tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask));
6993 }
6994 
6995 static const GVecGen2 sqxtnb_ops[3] = {
6996     { .fniv = gen_sqxtnb_vec,
6997       .opt_opc = sqxtn_list,
6998       .fno = gen_helper_sve2_sqxtnb_h,
6999       .vece = MO_16 },
7000     { .fniv = gen_sqxtnb_vec,
7001       .opt_opc = sqxtn_list,
7002       .fno = gen_helper_sve2_sqxtnb_s,
7003       .vece = MO_32 },
7004     { .fniv = gen_sqxtnb_vec,
7005       .opt_opc = sqxtn_list,
7006       .fno = gen_helper_sve2_sqxtnb_d,
7007       .vece = MO_64 },
7008 };
7009 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
7010 
7011 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7012 {
7013     int halfbits = 4 << vece;
7014     int64_t mask = (1ull << halfbits) - 1;
7015     int64_t min = -1ull << (halfbits - 1);
7016     int64_t max = -min - 1;
7017 
7018     tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
7019     tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
7020     tcg_gen_shli_vec(vece, n, n, halfbits);
7021     tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
7022 }
7023 
7024 static const GVecGen2 sqxtnt_ops[3] = {
7025     { .fniv = gen_sqxtnt_vec,
7026       .opt_opc = sqxtn_list,
7027       .load_dest = true,
7028       .fno = gen_helper_sve2_sqxtnt_h,
7029       .vece = MO_16 },
7030     { .fniv = gen_sqxtnt_vec,
7031       .opt_opc = sqxtn_list,
7032       .load_dest = true,
7033       .fno = gen_helper_sve2_sqxtnt_s,
7034       .vece = MO_32 },
7035     { .fniv = gen_sqxtnt_vec,
7036       .opt_opc = sqxtn_list,
7037       .load_dest = true,
7038       .fno = gen_helper_sve2_sqxtnt_d,
7039       .vece = MO_64 },
7040 };
7041 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops)
7042 
7043 static const TCGOpcode uqxtn_list[] = {
7044     INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7045 };
7046 
7047 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7048 {
7049     int halfbits = 4 << vece;
7050     int64_t max = (1ull << halfbits) - 1;
7051 
7052     tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
7053 }
7054 
7055 static const GVecGen2 uqxtnb_ops[3] = {
7056     { .fniv = gen_uqxtnb_vec,
7057       .opt_opc = uqxtn_list,
7058       .fno = gen_helper_sve2_uqxtnb_h,
7059       .vece = MO_16 },
7060     { .fniv = gen_uqxtnb_vec,
7061       .opt_opc = uqxtn_list,
7062       .fno = gen_helper_sve2_uqxtnb_s,
7063       .vece = MO_32 },
7064     { .fniv = gen_uqxtnb_vec,
7065       .opt_opc = uqxtn_list,
7066       .fno = gen_helper_sve2_uqxtnb_d,
7067       .vece = MO_64 },
7068 };
7069 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
7070 
7071 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7072 {
7073     int halfbits = 4 << vece;
7074     int64_t max = (1ull << halfbits) - 1;
7075     TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
7076 
7077     tcg_gen_umin_vec(vece, n, n, maxv);
7078     tcg_gen_shli_vec(vece, n, n, halfbits);
7079     tcg_gen_bitsel_vec(vece, d, maxv, d, n);
7080 }
7081 
7082 static const GVecGen2 uqxtnt_ops[3] = {
7083     { .fniv = gen_uqxtnt_vec,
7084       .opt_opc = uqxtn_list,
7085       .load_dest = true,
7086       .fno = gen_helper_sve2_uqxtnt_h,
7087       .vece = MO_16 },
7088     { .fniv = gen_uqxtnt_vec,
7089       .opt_opc = uqxtn_list,
7090       .load_dest = true,
7091       .fno = gen_helper_sve2_uqxtnt_s,
7092       .vece = MO_32 },
7093     { .fniv = gen_uqxtnt_vec,
7094       .opt_opc = uqxtn_list,
7095       .load_dest = true,
7096       .fno = gen_helper_sve2_uqxtnt_d,
7097       .vece = MO_64 },
7098 };
7099 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops)
7100 
7101 static const TCGOpcode sqxtun_list[] = {
7102     INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7103 };
7104 
7105 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7106 {
7107     int halfbits = 4 << vece;
7108     int64_t max = (1ull << halfbits) - 1;
7109 
7110     tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0));
7111     tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
7112 }
7113 
7114 static const GVecGen2 sqxtunb_ops[3] = {
7115     { .fniv = gen_sqxtunb_vec,
7116       .opt_opc = sqxtun_list,
7117       .fno = gen_helper_sve2_sqxtunb_h,
7118       .vece = MO_16 },
7119     { .fniv = gen_sqxtunb_vec,
7120       .opt_opc = sqxtun_list,
7121       .fno = gen_helper_sve2_sqxtunb_s,
7122       .vece = MO_32 },
7123     { .fniv = gen_sqxtunb_vec,
7124       .opt_opc = sqxtun_list,
7125       .fno = gen_helper_sve2_sqxtunb_d,
7126       .vece = MO_64 },
7127 };
7128 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
7129 
7130 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7131 {
7132     int halfbits = 4 << vece;
7133     int64_t max = (1ull << halfbits) - 1;
7134     TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
7135 
7136     tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
7137     tcg_gen_umin_vec(vece, n, n, maxv);
7138     tcg_gen_shli_vec(vece, n, n, halfbits);
7139     tcg_gen_bitsel_vec(vece, d, maxv, d, n);
7140 }
7141 
7142 static const GVecGen2 sqxtunt_ops[3] = {
7143     { .fniv = gen_sqxtunt_vec,
7144       .opt_opc = sqxtun_list,
7145       .load_dest = true,
7146       .fno = gen_helper_sve2_sqxtunt_h,
7147       .vece = MO_16 },
7148     { .fniv = gen_sqxtunt_vec,
7149       .opt_opc = sqxtun_list,
7150       .load_dest = true,
7151       .fno = gen_helper_sve2_sqxtunt_s,
7152       .vece = MO_32 },
7153     { .fniv = gen_sqxtunt_vec,
7154       .opt_opc = sqxtun_list,
7155       .load_dest = true,
7156       .fno = gen_helper_sve2_sqxtunt_d,
7157       .vece = MO_64 },
7158 };
7159 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops)
7160 
7161 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a,
7162                           const GVecGen2i ops[3])
7163 {
7164     if (a->esz < 0 || a->esz > MO_32) {
7165         return false;
7166     }
7167     assert(a->imm > 0 && a->imm <= (8 << a->esz));
7168     if (sve_access_check(s)) {
7169         unsigned vsz = vec_full_reg_size(s);
7170         tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7171                         vec_full_reg_offset(s, a->rn),
7172                         vsz, vsz, a->imm, &ops[a->esz]);
7173     }
7174     return true;
7175 }
7176 
7177 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7178 {
7179     int halfbits = 4 << vece;
7180     uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7181 
7182     tcg_gen_shri_i64(d, n, shr);
7183     tcg_gen_andi_i64(d, d, mask);
7184 }
7185 
7186 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7187 {
7188     gen_shrnb_i64(MO_16, d, n, shr);
7189 }
7190 
7191 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7192 {
7193     gen_shrnb_i64(MO_32, d, n, shr);
7194 }
7195 
7196 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7197 {
7198     gen_shrnb_i64(MO_64, d, n, shr);
7199 }
7200 
7201 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7202 {
7203     int halfbits = 4 << vece;
7204     uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7205 
7206     tcg_gen_shri_vec(vece, n, n, shr);
7207     tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
7208 }
7209 
7210 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
7211 static const GVecGen2i shrnb_ops[3] = {
7212     { .fni8 = gen_shrnb16_i64,
7213       .fniv = gen_shrnb_vec,
7214       .opt_opc = shrnb_vec_list,
7215       .fno = gen_helper_sve2_shrnb_h,
7216       .vece = MO_16 },
7217     { .fni8 = gen_shrnb32_i64,
7218       .fniv = gen_shrnb_vec,
7219       .opt_opc = shrnb_vec_list,
7220       .fno = gen_helper_sve2_shrnb_s,
7221       .vece = MO_32 },
7222     { .fni8 = gen_shrnb64_i64,
7223       .fniv = gen_shrnb_vec,
7224       .opt_opc = shrnb_vec_list,
7225       .fno = gen_helper_sve2_shrnb_d,
7226       .vece = MO_64 },
7227 };
7228 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops)
7229 
7230 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7231 {
7232     int halfbits = 4 << vece;
7233     uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7234 
7235     tcg_gen_shli_i64(n, n, halfbits - shr);
7236     tcg_gen_andi_i64(n, n, ~mask);
7237     tcg_gen_andi_i64(d, d, mask);
7238     tcg_gen_or_i64(d, d, n);
7239 }
7240 
7241 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7242 {
7243     gen_shrnt_i64(MO_16, d, n, shr);
7244 }
7245 
7246 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7247 {
7248     gen_shrnt_i64(MO_32, d, n, shr);
7249 }
7250 
7251 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7252 {
7253     tcg_gen_shri_i64(n, n, shr);
7254     tcg_gen_deposit_i64(d, d, n, 32, 32);
7255 }
7256 
7257 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7258 {
7259     int halfbits = 4 << vece;
7260     uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7261 
7262     tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7263     tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
7264 }
7265 
7266 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
7267 static const GVecGen2i shrnt_ops[3] = {
7268     { .fni8 = gen_shrnt16_i64,
7269       .fniv = gen_shrnt_vec,
7270       .opt_opc = shrnt_vec_list,
7271       .load_dest = true,
7272       .fno = gen_helper_sve2_shrnt_h,
7273       .vece = MO_16 },
7274     { .fni8 = gen_shrnt32_i64,
7275       .fniv = gen_shrnt_vec,
7276       .opt_opc = shrnt_vec_list,
7277       .load_dest = true,
7278       .fno = gen_helper_sve2_shrnt_s,
7279       .vece = MO_32 },
7280     { .fni8 = gen_shrnt64_i64,
7281       .fniv = gen_shrnt_vec,
7282       .opt_opc = shrnt_vec_list,
7283       .load_dest = true,
7284       .fno = gen_helper_sve2_shrnt_d,
7285       .vece = MO_64 },
7286 };
7287 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops)
7288 
7289 static const GVecGen2i rshrnb_ops[3] = {
7290     { .fno = gen_helper_sve2_rshrnb_h },
7291     { .fno = gen_helper_sve2_rshrnb_s },
7292     { .fno = gen_helper_sve2_rshrnb_d },
7293 };
7294 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops)
7295 
7296 static const GVecGen2i rshrnt_ops[3] = {
7297     { .fno = gen_helper_sve2_rshrnt_h },
7298     { .fno = gen_helper_sve2_rshrnt_s },
7299     { .fno = gen_helper_sve2_rshrnt_d },
7300 };
7301 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
7302 
7303 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7304                              TCGv_vec n, int64_t shr)
7305 {
7306     int halfbits = 4 << vece;
7307     uint64_t max = MAKE_64BIT_MASK(0, halfbits);
7308 
7309     tcg_gen_sari_vec(vece, n, n, shr);
7310     tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
7311     tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
7312 }
7313 
7314 static const TCGOpcode sqshrunb_vec_list[] = {
7315     INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7316 };
7317 static const GVecGen2i sqshrunb_ops[3] = {
7318     { .fniv = gen_sqshrunb_vec,
7319       .opt_opc = sqshrunb_vec_list,
7320       .fno = gen_helper_sve2_sqshrunb_h,
7321       .vece = MO_16 },
7322     { .fniv = gen_sqshrunb_vec,
7323       .opt_opc = sqshrunb_vec_list,
7324       .fno = gen_helper_sve2_sqshrunb_s,
7325       .vece = MO_32 },
7326     { .fniv = gen_sqshrunb_vec,
7327       .opt_opc = sqshrunb_vec_list,
7328       .fno = gen_helper_sve2_sqshrunb_d,
7329       .vece = MO_64 },
7330 };
7331 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
7332 
7333 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7334                              TCGv_vec n, int64_t shr)
7335 {
7336     int halfbits = 4 << vece;
7337     uint64_t max = MAKE_64BIT_MASK(0, halfbits);
7338     TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
7339 
7340     tcg_gen_sari_vec(vece, n, n, shr);
7341     tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
7342     tcg_gen_umin_vec(vece, n, n, maxv);
7343     tcg_gen_shli_vec(vece, n, n, halfbits);
7344     tcg_gen_bitsel_vec(vece, d, maxv, d, n);
7345 }
7346 
7347 static const TCGOpcode sqshrunt_vec_list[] = {
7348     INDEX_op_shli_vec, INDEX_op_sari_vec,
7349     INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7350 };
7351 static const GVecGen2i sqshrunt_ops[3] = {
7352     { .fniv = gen_sqshrunt_vec,
7353       .opt_opc = sqshrunt_vec_list,
7354       .load_dest = true,
7355       .fno = gen_helper_sve2_sqshrunt_h,
7356       .vece = MO_16 },
7357     { .fniv = gen_sqshrunt_vec,
7358       .opt_opc = sqshrunt_vec_list,
7359       .load_dest = true,
7360       .fno = gen_helper_sve2_sqshrunt_s,
7361       .vece = MO_32 },
7362     { .fniv = gen_sqshrunt_vec,
7363       .opt_opc = sqshrunt_vec_list,
7364       .load_dest = true,
7365       .fno = gen_helper_sve2_sqshrunt_d,
7366       .vece = MO_64 },
7367 };
7368 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops)
7369 
7370 static const GVecGen2i sqrshrunb_ops[3] = {
7371     { .fno = gen_helper_sve2_sqrshrunb_h },
7372     { .fno = gen_helper_sve2_sqrshrunb_s },
7373     { .fno = gen_helper_sve2_sqrshrunb_d },
7374 };
7375 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops)
7376 
7377 static const GVecGen2i sqrshrunt_ops[3] = {
7378     { .fno = gen_helper_sve2_sqrshrunt_h },
7379     { .fno = gen_helper_sve2_sqrshrunt_s },
7380     { .fno = gen_helper_sve2_sqrshrunt_d },
7381 };
7382 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
7383 
7384 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7385                             TCGv_vec n, int64_t shr)
7386 {
7387     int halfbits = 4 << vece;
7388     int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7389     int64_t min = -max - 1;
7390     int64_t mask = MAKE_64BIT_MASK(0, halfbits);
7391 
7392     tcg_gen_sari_vec(vece, n, n, shr);
7393     tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
7394     tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
7395     tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
7396 }
7397 
7398 static const TCGOpcode sqshrnb_vec_list[] = {
7399     INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7400 };
7401 static const GVecGen2i sqshrnb_ops[3] = {
7402     { .fniv = gen_sqshrnb_vec,
7403       .opt_opc = sqshrnb_vec_list,
7404       .fno = gen_helper_sve2_sqshrnb_h,
7405       .vece = MO_16 },
7406     { .fniv = gen_sqshrnb_vec,
7407       .opt_opc = sqshrnb_vec_list,
7408       .fno = gen_helper_sve2_sqshrnb_s,
7409       .vece = MO_32 },
7410     { .fniv = gen_sqshrnb_vec,
7411       .opt_opc = sqshrnb_vec_list,
7412       .fno = gen_helper_sve2_sqshrnb_d,
7413       .vece = MO_64 },
7414 };
7415 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
7416 
7417 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7418                              TCGv_vec n, int64_t shr)
7419 {
7420     int halfbits = 4 << vece;
7421     int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7422     int64_t min = -max - 1;
7423     int64_t mask = MAKE_64BIT_MASK(0, halfbits);
7424 
7425     tcg_gen_sari_vec(vece, n, n, shr);
7426     tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
7427     tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
7428     tcg_gen_shli_vec(vece, n, n, halfbits);
7429     tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
7430 }
7431 
7432 static const TCGOpcode sqshrnt_vec_list[] = {
7433     INDEX_op_shli_vec, INDEX_op_sari_vec,
7434     INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7435 };
7436 static const GVecGen2i sqshrnt_ops[3] = {
7437     { .fniv = gen_sqshrnt_vec,
7438       .opt_opc = sqshrnt_vec_list,
7439       .load_dest = true,
7440       .fno = gen_helper_sve2_sqshrnt_h,
7441       .vece = MO_16 },
7442     { .fniv = gen_sqshrnt_vec,
7443       .opt_opc = sqshrnt_vec_list,
7444       .load_dest = true,
7445       .fno = gen_helper_sve2_sqshrnt_s,
7446       .vece = MO_32 },
7447     { .fniv = gen_sqshrnt_vec,
7448       .opt_opc = sqshrnt_vec_list,
7449       .load_dest = true,
7450       .fno = gen_helper_sve2_sqshrnt_d,
7451       .vece = MO_64 },
7452 };
7453 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops)
7454 
7455 static const GVecGen2i sqrshrnb_ops[3] = {
7456     { .fno = gen_helper_sve2_sqrshrnb_h },
7457     { .fno = gen_helper_sve2_sqrshrnb_s },
7458     { .fno = gen_helper_sve2_sqrshrnb_d },
7459 };
7460 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops)
7461 
7462 static const GVecGen2i sqrshrnt_ops[3] = {
7463     { .fno = gen_helper_sve2_sqrshrnt_h },
7464     { .fno = gen_helper_sve2_sqrshrnt_s },
7465     { .fno = gen_helper_sve2_sqrshrnt_d },
7466 };
7467 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
7468 
7469 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7470                             TCGv_vec n, int64_t shr)
7471 {
7472     int halfbits = 4 << vece;
7473     int64_t max = MAKE_64BIT_MASK(0, halfbits);
7474 
7475     tcg_gen_shri_vec(vece, n, n, shr);
7476     tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
7477 }
7478 
7479 static const TCGOpcode uqshrnb_vec_list[] = {
7480     INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7481 };
7482 static const GVecGen2i uqshrnb_ops[3] = {
7483     { .fniv = gen_uqshrnb_vec,
7484       .opt_opc = uqshrnb_vec_list,
7485       .fno = gen_helper_sve2_uqshrnb_h,
7486       .vece = MO_16 },
7487     { .fniv = gen_uqshrnb_vec,
7488       .opt_opc = uqshrnb_vec_list,
7489       .fno = gen_helper_sve2_uqshrnb_s,
7490       .vece = MO_32 },
7491     { .fniv = gen_uqshrnb_vec,
7492       .opt_opc = uqshrnb_vec_list,
7493       .fno = gen_helper_sve2_uqshrnb_d,
7494       .vece = MO_64 },
7495 };
7496 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
7497 
7498 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7499                             TCGv_vec n, int64_t shr)
7500 {
7501     int halfbits = 4 << vece;
7502     int64_t max = MAKE_64BIT_MASK(0, halfbits);
7503     TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
7504 
7505     tcg_gen_shri_vec(vece, n, n, shr);
7506     tcg_gen_umin_vec(vece, n, n, maxv);
7507     tcg_gen_shli_vec(vece, n, n, halfbits);
7508     tcg_gen_bitsel_vec(vece, d, maxv, d, n);
7509 }
7510 
7511 static const TCGOpcode uqshrnt_vec_list[] = {
7512     INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7513 };
7514 static const GVecGen2i uqshrnt_ops[3] = {
7515     { .fniv = gen_uqshrnt_vec,
7516       .opt_opc = uqshrnt_vec_list,
7517       .load_dest = true,
7518       .fno = gen_helper_sve2_uqshrnt_h,
7519       .vece = MO_16 },
7520     { .fniv = gen_uqshrnt_vec,
7521       .opt_opc = uqshrnt_vec_list,
7522       .load_dest = true,
7523       .fno = gen_helper_sve2_uqshrnt_s,
7524       .vece = MO_32 },
7525     { .fniv = gen_uqshrnt_vec,
7526       .opt_opc = uqshrnt_vec_list,
7527       .load_dest = true,
7528       .fno = gen_helper_sve2_uqshrnt_d,
7529       .vece = MO_64 },
7530 };
7531 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops)
7532 
7533 static const GVecGen2i uqrshrnb_ops[3] = {
7534     { .fno = gen_helper_sve2_uqrshrnb_h },
7535     { .fno = gen_helper_sve2_uqrshrnb_s },
7536     { .fno = gen_helper_sve2_uqrshrnb_d },
7537 };
7538 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops)
7539 
7540 static const GVecGen2i uqrshrnt_ops[3] = {
7541     { .fno = gen_helper_sve2_uqrshrnt_h },
7542     { .fno = gen_helper_sve2_uqrshrnt_s },
7543     { .fno = gen_helper_sve2_uqrshrnt_d },
7544 };
7545 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops)
7546 
7547 #define DO_SVE2_ZZZ_NARROW(NAME, name)                                    \
7548     static gen_helper_gvec_3 * const name##_fns[4] = {                    \
7549         NULL,                       gen_helper_sve2_##name##_h,           \
7550         gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,           \
7551     };                                                                    \
7552     TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz,                     \
7553                name##_fns[a->esz], a, 0)
7554 
7555 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7556 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
7557 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7558 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
7559 
7560 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7561 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
7562 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7563 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
7564 
7565 static gen_helper_gvec_flags_4 * const match_fns[4] = {
7566     gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
7567 };
7568 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
7569 
7570 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
7571     gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
7572 };
7573 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
7574 
7575 static gen_helper_gvec_4 * const histcnt_fns[4] = {
7576     NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7577 };
7578 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
7579                         histcnt_fns[a->esz], a, 0)
7580 
7581 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7582                         a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7583 
7584 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
7585 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
7586 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
7587 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
7588 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
7589 
7590 static bool do_fmmla(DisasContext *s, arg_rrrr_esz *a,
7591                      gen_helper_gvec_4_ptr *fn)
7592 {
7593     if (sve_access_check(s)) {
7594         if (vec_full_reg_size(s) < 4 * memop_size(a->esz)) {
7595             unallocated_encoding(s);
7596         } else {
7597             gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, 0, FPST_A64);
7598         }
7599     }
7600     return true;
7601 }
7602 
7603 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, do_fmmla, a, gen_helper_fmmla_s)
7604 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, do_fmmla, a, gen_helper_fmmla_d)
7605 
7606 /*
7607  * SVE Integer Multiply-Add (unpredicated)
7608  */
7609 
7610 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7611     NULL,                           gen_helper_sve2_sqdmlal_zzzw_h,
7612     gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7613 };
7614 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7615            sqdmlal_zzzw_fns[a->esz], a, 0)
7616 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7617            sqdmlal_zzzw_fns[a->esz], a, 3)
7618 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7619            sqdmlal_zzzw_fns[a->esz], a, 2)
7620 
7621 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7622     NULL,                           gen_helper_sve2_sqdmlsl_zzzw_h,
7623     gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7624 };
7625 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7626            sqdmlsl_zzzw_fns[a->esz], a, 0)
7627 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7628            sqdmlsl_zzzw_fns[a->esz], a, 3)
7629 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7630            sqdmlsl_zzzw_fns[a->esz], a, 2)
7631 
7632 static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7633     gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7634     gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7635 };
7636 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7637            sqrdmlah_fns[a->esz], a, 0)
7638 
7639 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7640     gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7641     gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7642 };
7643 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7644            sqrdmlsh_fns[a->esz], a, 0)
7645 
7646 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7647     NULL,                         gen_helper_sve2_smlal_zzzw_h,
7648     gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7649 };
7650 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7651            smlal_zzzw_fns[a->esz], a, 0)
7652 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7653            smlal_zzzw_fns[a->esz], a, 1)
7654 
7655 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7656     NULL,                         gen_helper_sve2_umlal_zzzw_h,
7657     gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7658 };
7659 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7660            umlal_zzzw_fns[a->esz], a, 0)
7661 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7662            umlal_zzzw_fns[a->esz], a, 1)
7663 
7664 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7665     NULL,                         gen_helper_sve2_smlsl_zzzw_h,
7666     gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7667 };
7668 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7669            smlsl_zzzw_fns[a->esz], a, 0)
7670 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7671            smlsl_zzzw_fns[a->esz], a, 1)
7672 
7673 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7674     NULL,                         gen_helper_sve2_umlsl_zzzw_h,
7675     gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7676 };
7677 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7678            umlsl_zzzw_fns[a->esz], a, 0)
7679 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7680            umlsl_zzzw_fns[a->esz], a, 1)
7681 
7682 static gen_helper_gvec_4 * const cmla_fns[] = {
7683     gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7684     gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7685 };
7686 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7687            cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7688 
7689 static gen_helper_gvec_4 * const cdot_fns[] = {
7690     NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7691 };
7692 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7693            cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7694 
7695 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7696     gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7697     gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7698 };
7699 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7700            sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7701 
7702 TRANS_FEAT(USDOT_zzzz_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7703            gen_helper_gvec_usdot_4b, a, 0)
7704 
7705 TRANS_FEAT(SDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz,
7706            gen_helper_gvec_sdot_2h, a, 0)
7707 TRANS_FEAT(UDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz,
7708            gen_helper_gvec_udot_2h, a, 0)
7709 
7710 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7711                         gen_helper_crypto_aesmc, a->rd, a->rd, 0)
7712 TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz,
7713                         gen_helper_crypto_aesimc, a->rd, a->rd, 0)
7714 
7715 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7716                         gen_helper_crypto_aese, a, 0)
7717 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7718                         gen_helper_crypto_aesd, a, 0)
7719 
7720 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7721                         gen_helper_crypto_sm4e, a, 0)
7722 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7723                         gen_helper_crypto_sm4ekey, a, 0)
7724 
7725 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
7726                         gen_gvec_rax1, a)
7727 
7728 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7729            gen_helper_sve2_fcvtnt_sh, a, 0, FPST_A64)
7730 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7731            gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64)
7732 
7733 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7734            gen_helper_sve_bfcvtnt, a, 0,
7735            s->fpcr_ah ? FPST_AH : FPST_A64)
7736 
7737 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7738            gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64)
7739 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7740            gen_helper_sve2_fcvtlt_sd, a, 0, FPST_A64)
7741 
7742 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7743            FPROUNDING_ODD, gen_helper_sve_fcvt_ds)
7744 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7745            FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds)
7746 
7747 static gen_helper_gvec_3_ptr * const flogb_fns[] = {
7748     NULL,               gen_helper_flogb_h,
7749     gen_helper_flogb_s, gen_helper_flogb_d
7750 };
7751 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
7752            a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
7753 
7754 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7755 {
7756     return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7757                              a->rd, a->rn, a->rm, a->ra,
7758                              (sel << 1) | sub, tcg_env);
7759 }
7760 
7761 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false)
7762 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true)
7763 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false)
7764 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true)
7765 
7766 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7767 {
7768     return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7769                              a->rd, a->rn, a->rm, a->ra,
7770                              (a->index << 3) | (sel << 1) | sub, tcg_env);
7771 }
7772 
7773 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false)
7774 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
7775 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
7776 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
7777 
7778 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7779                         gen_helper_gvec_smmla_b, a, 0)
7780 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7781                         gen_helper_gvec_usmmla_b, a, 0)
7782 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7783                         gen_helper_gvec_ummla_b, a, 0)
7784 
7785 TRANS_FEAT(FDOT_zzzz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzzz,
7786            gen_helper_sme2_fdot_h, a, 0)
7787 TRANS_FEAT(FDOT_zzxz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzxz,
7788            gen_helper_sme2_fdot_idx_h, a)
7789 
7790 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
7791            gen_helper_gvec_bfdot, a, 0)
7792 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz,
7793            gen_helper_gvec_bfdot_idx, a)
7794 
7795 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
7796                         gen_helper_gvec_bfmmla, a, 0)
7797 
7798 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7799 {
7800     return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7801                               a->rd, a->rn, a->rm, a->ra, sel,
7802                               s->fpcr_ah ? FPST_AH : FPST_A64);
7803 }
7804 
7805 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7806 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
7807 
7808 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7809 {
7810     return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7811                               a->rd, a->rn, a->rm, a->ra,
7812                               (a->index << 1) | sel,
7813                               s->fpcr_ah ? FPST_AH : FPST_A64);
7814 }
7815 
7816 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7817 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
7818 
7819 static bool do_BFMLSL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7820 {
7821     if (s->fpcr_ah) {
7822         return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl,
7823                                   a->rd, a->rn, a->rm, a->ra, sel, FPST_AH);
7824     } else {
7825         return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl,
7826                                   a->rd, a->rn, a->rm, a->ra, sel, FPST_A64);
7827     }
7828 }
7829 
7830 TRANS_FEAT(BFMLSLB_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, false)
7831 TRANS_FEAT(BFMLSLT_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, true)
7832 
7833 static bool do_BFMLSL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7834 {
7835     if (s->fpcr_ah) {
7836         return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl_idx,
7837                                   a->rd, a->rn, a->rm, a->ra,
7838                                   (a->index << 1) | sel, FPST_AH);
7839     } else {
7840         return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl_idx,
7841                                   a->rd, a->rn, a->rm, a->ra,
7842                                   (a->index << 1) | sel, FPST_A64);
7843     }
7844 }
7845 
7846 TRANS_FEAT(BFMLSLB_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, false)
7847 TRANS_FEAT(BFMLSLT_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, true)
7848 
7849 static bool trans_PSEL(DisasContext *s, arg_psel *a)
7850 {
7851     int vl = vec_full_reg_size(s);
7852     int pl = pred_gvec_reg_size(s);
7853     int elements = vl >> a->esz;
7854     TCGv_i64 tmp, didx, dbit;
7855     TCGv_ptr ptr;
7856 
7857     if (!dc_isar_feature(aa64_sme_or_sve2p1, s)) {
7858         return false;
7859     }
7860     if (!sve_access_check(s)) {
7861         return true;
7862     }
7863 
7864     tmp = tcg_temp_new_i64();
7865     dbit = tcg_temp_new_i64();
7866     didx = tcg_temp_new_i64();
7867     ptr = tcg_temp_new_ptr();
7868 
7869     /* Compute the predicate element. */
7870     tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
7871     if (is_power_of_2(elements)) {
7872         tcg_gen_andi_i64(tmp, tmp, elements - 1);
7873     } else {
7874         tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
7875     }
7876 
7877     /* Extract the predicate byte and bit indices. */
7878     tcg_gen_shli_i64(tmp, tmp, a->esz);
7879     tcg_gen_andi_i64(dbit, tmp, 7);
7880     tcg_gen_shri_i64(didx, tmp, 3);
7881     if (HOST_BIG_ENDIAN) {
7882         tcg_gen_xori_i64(didx, didx, 7);
7883     }
7884 
7885     /* Load the predicate word. */
7886     tcg_gen_trunc_i64_ptr(ptr, didx);
7887     tcg_gen_add_ptr(ptr, ptr, tcg_env);
7888     tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
7889 
7890     /* Extract the predicate bit and replicate to MO_64. */
7891     tcg_gen_shr_i64(tmp, tmp, dbit);
7892     tcg_gen_andi_i64(tmp, tmp, 1);
7893     tcg_gen_neg_i64(tmp, tmp);
7894 
7895     /* Apply to either copy the source, or write zeros. */
7896     pl = size_for_gvec(pl);
7897     tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
7898                       pred_full_reg_offset(s, a->pn), tmp, pl, pl);
7899     return true;
7900 }
7901 
7902 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
7903 {
7904     tcg_gen_smax_i32(d, a, n);
7905     tcg_gen_smin_i32(d, d, m);
7906 }
7907 
7908 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
7909 {
7910     tcg_gen_smax_i64(d, a, n);
7911     tcg_gen_smin_i64(d, d, m);
7912 }
7913 
7914 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
7915                            TCGv_vec m, TCGv_vec a)
7916 {
7917     tcg_gen_smax_vec(vece, d, a, n);
7918     tcg_gen_smin_vec(vece, d, d, m);
7919 }
7920 
7921 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
7922                        uint32_t a, uint32_t oprsz, uint32_t maxsz)
7923 {
7924     static const TCGOpcode vecop[] = {
7925         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7926     };
7927     static const GVecGen4 ops[4] = {
7928         { .fniv = gen_sclamp_vec,
7929           .fno  = gen_helper_gvec_sclamp_b,
7930           .opt_opc = vecop,
7931           .vece = MO_8 },
7932         { .fniv = gen_sclamp_vec,
7933           .fno  = gen_helper_gvec_sclamp_h,
7934           .opt_opc = vecop,
7935           .vece = MO_16 },
7936         { .fni4 = gen_sclamp_i32,
7937           .fniv = gen_sclamp_vec,
7938           .fno  = gen_helper_gvec_sclamp_s,
7939           .opt_opc = vecop,
7940           .vece = MO_32 },
7941         { .fni8 = gen_sclamp_i64,
7942           .fniv = gen_sclamp_vec,
7943           .fno  = gen_helper_gvec_sclamp_d,
7944           .opt_opc = vecop,
7945           .vece = MO_64,
7946           .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
7947     };
7948     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
7949 }
7950 
7951 TRANS_FEAT(SCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
7952 
7953 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
7954 {
7955     tcg_gen_umax_i32(d, a, n);
7956     tcg_gen_umin_i32(d, d, m);
7957 }
7958 
7959 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
7960 {
7961     tcg_gen_umax_i64(d, a, n);
7962     tcg_gen_umin_i64(d, d, m);
7963 }
7964 
7965 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
7966                            TCGv_vec m, TCGv_vec a)
7967 {
7968     tcg_gen_umax_vec(vece, d, a, n);
7969     tcg_gen_umin_vec(vece, d, d, m);
7970 }
7971 
7972 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
7973                        uint32_t a, uint32_t oprsz, uint32_t maxsz)
7974 {
7975     static const TCGOpcode vecop[] = {
7976         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
7977     };
7978     static const GVecGen4 ops[4] = {
7979         { .fniv = gen_uclamp_vec,
7980           .fno  = gen_helper_gvec_uclamp_b,
7981           .opt_opc = vecop,
7982           .vece = MO_8 },
7983         { .fniv = gen_uclamp_vec,
7984           .fno  = gen_helper_gvec_uclamp_h,
7985           .opt_opc = vecop,
7986           .vece = MO_16 },
7987         { .fni4 = gen_uclamp_i32,
7988           .fniv = gen_uclamp_vec,
7989           .fno  = gen_helper_gvec_uclamp_s,
7990           .opt_opc = vecop,
7991           .vece = MO_32 },
7992         { .fni8 = gen_uclamp_i64,
7993           .fniv = gen_uclamp_vec,
7994           .fno  = gen_helper_gvec_uclamp_d,
7995           .opt_opc = vecop,
7996           .vece = MO_64,
7997           .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
7998     };
7999     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
8000 }
8001 
8002 TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
8003 
8004 static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a)
8005 {
8006     static gen_helper_gvec_3_ptr * const fn[] = {
8007         gen_helper_sme2_bfclamp,
8008         gen_helper_sme2_fclamp_h,
8009         gen_helper_sme2_fclamp_s,
8010         gen_helper_sme2_fclamp_d,
8011     };
8012 
8013     /* This insn uses MO_8 to encode BFloat16. */
8014     if (a->esz == MO_8
8015         ? !dc_isar_feature(aa64_sve_b16b16, s)
8016         : !dc_isar_feature(aa64_sme2_or_sve2p1, s)) {
8017         return false;
8018     }
8019 
8020     /* So far we never optimize rda with MOVPRFX */
8021     assert(a->rd == a->ra);
8022     return gen_gvec_fpst_zzz(s, fn[a->esz], a->rd, a->rn, a->rm, 1,
8023                              a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
8024 }
8025 
8026 TRANS_FEAT(SQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz,
8027            gen_helper_sme2_sqcvtn_sh, a->rd, a->rn, 0)
8028 TRANS_FEAT(UQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz,
8029            gen_helper_sme2_uqcvtn_sh, a->rd, a->rn, 0)
8030 TRANS_FEAT(SQCVTUN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz,
8031            gen_helper_sme2_sqcvtun_sh, a->rd, a->rn, 0)
8032 
8033 static bool gen_ldst_c(DisasContext *s, TCGv_i64 addr, int zd, int png,
8034                        MemOp esz, bool is_write, int n, bool strided)
8035 {
8036     typedef void ldst_c_fn(TCGv_env, TCGv_ptr, TCGv_i64,
8037                            TCGv_i32, TCGv_i32);
8038     static ldst_c_fn * const f_ldst[2][2][4] = {
8039         { { gen_helper_sve2p1_ld1bb_c,
8040             gen_helper_sve2p1_ld1hh_le_c,
8041             gen_helper_sve2p1_ld1ss_le_c,
8042             gen_helper_sve2p1_ld1dd_le_c, },
8043           { gen_helper_sve2p1_ld1bb_c,
8044             gen_helper_sve2p1_ld1hh_be_c,
8045             gen_helper_sve2p1_ld1ss_be_c,
8046             gen_helper_sve2p1_ld1dd_be_c, } },
8047 
8048         { { gen_helper_sve2p1_st1bb_c,
8049             gen_helper_sve2p1_st1hh_le_c,
8050             gen_helper_sve2p1_st1ss_le_c,
8051             gen_helper_sve2p1_st1dd_le_c, },
8052           { gen_helper_sve2p1_st1bb_c,
8053             gen_helper_sve2p1_st1hh_be_c,
8054             gen_helper_sve2p1_st1ss_be_c,
8055             gen_helper_sve2p1_st1dd_be_c, } }
8056     };
8057 
8058     TCGv_i32 t_png, t_desc;
8059     TCGv_ptr t_zd;
8060     uint32_t desc, lg2_rstride = 0;
8061     bool be = s->be_data == MO_BE;
8062 
8063     assert(n == 2 || n == 4);
8064     if (strided) {
8065         lg2_rstride = 3;
8066         if (n == 4) {
8067             /* Validate ZD alignment. */
8068             if (zd & 4) {
8069                 return false;
8070             }
8071             lg2_rstride = 2;
8072         }
8073         /* Ignore non-temporal bit */
8074         zd &= ~8;
8075     }
8076 
8077     if (strided || !dc_isar_feature(aa64_sve2p1, s)
8078         ? !sme_sm_enabled_check(s)
8079         : !sve_access_check(s)) {
8080         return true;
8081     }
8082 
8083     if (!s->mte_active[0]) {
8084         addr = clean_data_tbi(s, addr);
8085     }
8086 
8087     desc = n == 2 ? 0 : 1;
8088     desc = desc | (lg2_rstride << 1);
8089     desc = make_svemte_desc(s, vec_full_reg_size(s), 1, esz, is_write, desc);
8090     t_desc = tcg_constant_i32(desc);
8091 
8092     t_png = tcg_temp_new_i32();
8093     tcg_gen_ld16u_i32(t_png, tcg_env,
8094                       pred_full_reg_offset(s, png) ^
8095                       (HOST_BIG_ENDIAN ? 6 : 0));
8096 
8097     t_zd = tcg_temp_new_ptr();
8098     tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd));
8099 
8100     f_ldst[is_write][be][esz](tcg_env, t_zd, addr, t_png, t_desc);
8101     return true;
8102 }
8103 
8104 static bool gen_ldst_zcrr_c(DisasContext *s, arg_zcrr_ldst *a,
8105                             bool is_write, bool strided)
8106 {
8107     TCGv_i64 addr = tcg_temp_new_i64();
8108 
8109     tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
8110     tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
8111     return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write,
8112                       a->nreg, strided);
8113 }
8114 
8115 static bool gen_ldst_zcri_c(DisasContext *s, arg_zcri_ldst *a,
8116                             bool is_write, bool strided)
8117 {
8118     TCGv_i64 addr = tcg_temp_new_i64();
8119 
8120     tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
8121                      a->imm * a->nreg * vec_full_reg_size(s));
8122     return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write,
8123                       a->nreg, strided);
8124 }
8125 
8126 TRANS_FEAT(LD1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, false, false)
8127 TRANS_FEAT(LD1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, false, false)
8128 TRANS_FEAT(ST1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, true, false)
8129 TRANS_FEAT(ST1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, true, false)
8130 
8131 TRANS_FEAT(LD1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, false, true)
8132 TRANS_FEAT(LD1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, false, true)
8133 TRANS_FEAT(ST1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, true, true)
8134 TRANS_FEAT(ST1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, true, true)
8135