1e67cd1caSRichard Henderson /*
2e67cd1caSRichard Henderson * AArch64 SME translation
3e67cd1caSRichard Henderson *
4e67cd1caSRichard Henderson * Copyright (c) 2022 Linaro, Ltd
5e67cd1caSRichard Henderson *
6e67cd1caSRichard Henderson * This library is free software; you can redistribute it and/or
7e67cd1caSRichard Henderson * modify it under the terms of the GNU Lesser General Public
8e67cd1caSRichard Henderson * License as published by the Free Software Foundation; either
9e67cd1caSRichard Henderson * version 2.1 of the License, or (at your option) any later version.
10e67cd1caSRichard Henderson *
11e67cd1caSRichard Henderson * This library is distributed in the hope that it will be useful,
12e67cd1caSRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of
13e67cd1caSRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14e67cd1caSRichard Henderson * Lesser General Public License for more details.
15e67cd1caSRichard Henderson *
16e67cd1caSRichard Henderson * You should have received a copy of the GNU Lesser General Public
17e67cd1caSRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18e67cd1caSRichard Henderson */
19e67cd1caSRichard Henderson
20e67cd1caSRichard Henderson #include "qemu/osdep.h"
21e67cd1caSRichard Henderson #include "translate.h"
22e67cd1caSRichard Henderson #include "translate-a64.h"
23e67cd1caSRichard Henderson
24e67cd1caSRichard Henderson /*
25e67cd1caSRichard Henderson * Include the generated decoder.
26e67cd1caSRichard Henderson */
27e67cd1caSRichard Henderson
28e67cd1caSRichard Henderson #include "decode-sme.c.inc"
29ad939afbSRichard Henderson
30ad939afbSRichard Henderson
31e9ad3ef1SRichard Henderson /*
32e9ad3ef1SRichard Henderson * Resolve tile.size[index] to a host pointer, where tile and index
33e9ad3ef1SRichard Henderson * are always decoded together, dependent on the element size.
34e9ad3ef1SRichard Henderson */
get_tile_rowcol(DisasContext * s,int esz,int rs,int tile_index,bool vertical)35e9ad3ef1SRichard Henderson static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
36e9ad3ef1SRichard Henderson int tile_index, bool vertical)
37e9ad3ef1SRichard Henderson {
38e9ad3ef1SRichard Henderson int tile = tile_index >> (4 - esz);
39e9ad3ef1SRichard Henderson int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
40e9ad3ef1SRichard Henderson int pos, len, offset;
41e9ad3ef1SRichard Henderson TCGv_i32 tmp;
42e9ad3ef1SRichard Henderson TCGv_ptr addr;
43e9ad3ef1SRichard Henderson
44e9ad3ef1SRichard Henderson /* Compute the final index, which is Rs+imm. */
45e9ad3ef1SRichard Henderson tmp = tcg_temp_new_i32();
46e9ad3ef1SRichard Henderson tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
47e9ad3ef1SRichard Henderson tcg_gen_addi_i32(tmp, tmp, index);
48e9ad3ef1SRichard Henderson
49e9ad3ef1SRichard Henderson /* Prepare a power-of-two modulo via extraction of @len bits. */
50e9ad3ef1SRichard Henderson len = ctz32(streaming_vec_reg_size(s)) - esz;
51e9ad3ef1SRichard Henderson
5256f1c0dbSPeter Maydell if (!len) {
5356f1c0dbSPeter Maydell /*
5456f1c0dbSPeter Maydell * SVL is 128 and the element size is 128. There is exactly
5556f1c0dbSPeter Maydell * one 128x128 tile in the ZA storage, and so we calculate
5656f1c0dbSPeter Maydell * (Rs + imm) MOD 1, which is always 0. We need to special case
5756f1c0dbSPeter Maydell * this because TCG doesn't allow deposit ops with len 0.
5856f1c0dbSPeter Maydell */
5956f1c0dbSPeter Maydell tcg_gen_movi_i32(tmp, 0);
6056f1c0dbSPeter Maydell } else if (vertical) {
61e9ad3ef1SRichard Henderson /*
62e9ad3ef1SRichard Henderson * Compute the byte offset of the index within the tile:
63e9ad3ef1SRichard Henderson * (index % (svl / size)) * size
64e9ad3ef1SRichard Henderson * = (index % (svl >> esz)) << esz
65e9ad3ef1SRichard Henderson * Perform the power-of-two modulo via extraction of the low @len bits.
66e9ad3ef1SRichard Henderson * Perform the multiply by shifting left by @pos bits.
67e9ad3ef1SRichard Henderson * Perform these operations simultaneously via deposit into zero.
68e9ad3ef1SRichard Henderson */
69e9ad3ef1SRichard Henderson pos = esz;
70e9ad3ef1SRichard Henderson tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
71e9ad3ef1SRichard Henderson
72e9ad3ef1SRichard Henderson /*
73e9ad3ef1SRichard Henderson * For big-endian, adjust the indexed column byte offset within
74e9ad3ef1SRichard Henderson * the uint64_t host words that make up env->zarray[].
75e9ad3ef1SRichard Henderson */
76e9ad3ef1SRichard Henderson if (HOST_BIG_ENDIAN && esz < MO_64) {
77e9ad3ef1SRichard Henderson tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
78e9ad3ef1SRichard Henderson }
79e9ad3ef1SRichard Henderson } else {
80e9ad3ef1SRichard Henderson /*
81e9ad3ef1SRichard Henderson * Compute the byte offset of the index within the tile:
82e9ad3ef1SRichard Henderson * (index % (svl / size)) * (size * sizeof(row))
83e9ad3ef1SRichard Henderson * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
84e9ad3ef1SRichard Henderson */
85e9ad3ef1SRichard Henderson pos = esz + ctz32(sizeof(ARMVectorReg));
86e9ad3ef1SRichard Henderson tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
87e9ad3ef1SRichard Henderson
88e9ad3ef1SRichard Henderson /* Row slices are always aligned and need no endian adjustment. */
89e9ad3ef1SRichard Henderson }
90e9ad3ef1SRichard Henderson
91e9ad3ef1SRichard Henderson /* The tile byte offset within env->zarray is the row. */
92e9ad3ef1SRichard Henderson offset = tile * sizeof(ARMVectorReg);
93e9ad3ef1SRichard Henderson
94e9ad3ef1SRichard Henderson /* Include the byte offset of zarray to make this relative to env. */
95e9ad3ef1SRichard Henderson offset += offsetof(CPUARMState, zarray);
96e9ad3ef1SRichard Henderson tcg_gen_addi_i32(tmp, tmp, offset);
97e9ad3ef1SRichard Henderson
98e9ad3ef1SRichard Henderson /* Add the byte offset to env to produce the final pointer. */
99e9ad3ef1SRichard Henderson addr = tcg_temp_new_ptr();
100e9ad3ef1SRichard Henderson tcg_gen_ext_i32_ptr(addr, tmp);
101ad75a51eSRichard Henderson tcg_gen_add_ptr(addr, addr, tcg_env);
102e9ad3ef1SRichard Henderson
103e9ad3ef1SRichard Henderson return addr;
104e9ad3ef1SRichard Henderson }
105e9ad3ef1SRichard Henderson
1061f51573fSRichard Henderson /*
1071f51573fSRichard Henderson * Resolve tile.size[0] to a host pointer.
1081f51573fSRichard Henderson * Used by e.g. outer product insns where we require the entire tile.
1091f51573fSRichard Henderson */
get_tile(DisasContext * s,int esz,int tile)1101f51573fSRichard Henderson static TCGv_ptr get_tile(DisasContext *s, int esz, int tile)
1111f51573fSRichard Henderson {
1121f51573fSRichard Henderson TCGv_ptr addr = tcg_temp_new_ptr();
1131f51573fSRichard Henderson int offset;
1141f51573fSRichard Henderson
1151f51573fSRichard Henderson offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray);
1161f51573fSRichard Henderson
117ad75a51eSRichard Henderson tcg_gen_addi_ptr(addr, tcg_env, offset);
1181f51573fSRichard Henderson return addr;
1191f51573fSRichard Henderson }
1201f51573fSRichard Henderson
trans_ZERO(DisasContext * s,arg_ZERO * a)121ad939afbSRichard Henderson static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
122ad939afbSRichard Henderson {
123ad939afbSRichard Henderson if (!dc_isar_feature(aa64_sme, s)) {
124ad939afbSRichard Henderson return false;
125ad939afbSRichard Henderson }
126ad939afbSRichard Henderson if (sme_za_enabled_check(s)) {
127ad75a51eSRichard Henderson gen_helper_sme_zero(tcg_env, tcg_constant_i32(a->imm),
128ad939afbSRichard Henderson tcg_constant_i32(streaming_vec_reg_size(s)));
129ad939afbSRichard Henderson }
130ad939afbSRichard Henderson return true;
131ad939afbSRichard Henderson }
132e9ad3ef1SRichard Henderson
trans_MOVA(DisasContext * s,arg_MOVA * a)133e9ad3ef1SRichard Henderson static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
134e9ad3ef1SRichard Henderson {
135e9ad3ef1SRichard Henderson static gen_helper_gvec_4 * const h_fns[5] = {
136e9ad3ef1SRichard Henderson gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
137e9ad3ef1SRichard Henderson gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
138e9ad3ef1SRichard Henderson gen_helper_sve_sel_zpzz_q
139e9ad3ef1SRichard Henderson };
140e9ad3ef1SRichard Henderson static gen_helper_gvec_3 * const cz_fns[5] = {
141e9ad3ef1SRichard Henderson gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
142e9ad3ef1SRichard Henderson gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
143e9ad3ef1SRichard Henderson gen_helper_sme_mova_cz_q,
144e9ad3ef1SRichard Henderson };
145e9ad3ef1SRichard Henderson static gen_helper_gvec_3 * const zc_fns[5] = {
146e9ad3ef1SRichard Henderson gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
147e9ad3ef1SRichard Henderson gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
148e9ad3ef1SRichard Henderson gen_helper_sme_mova_zc_q,
149e9ad3ef1SRichard Henderson };
150e9ad3ef1SRichard Henderson
151e9ad3ef1SRichard Henderson TCGv_ptr t_za, t_zr, t_pg;
152e9ad3ef1SRichard Henderson TCGv_i32 t_desc;
153e9ad3ef1SRichard Henderson int svl;
154e9ad3ef1SRichard Henderson
155e9ad3ef1SRichard Henderson if (!dc_isar_feature(aa64_sme, s)) {
156e9ad3ef1SRichard Henderson return false;
157e9ad3ef1SRichard Henderson }
158e9ad3ef1SRichard Henderson if (!sme_smza_enabled_check(s)) {
159e9ad3ef1SRichard Henderson return true;
160e9ad3ef1SRichard Henderson }
161e9ad3ef1SRichard Henderson
162e9ad3ef1SRichard Henderson t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
163e9ad3ef1SRichard Henderson t_zr = vec_full_reg_ptr(s, a->zr);
164e9ad3ef1SRichard Henderson t_pg = pred_full_reg_ptr(s, a->pg);
165e9ad3ef1SRichard Henderson
166e9ad3ef1SRichard Henderson svl = streaming_vec_reg_size(s);
167e9ad3ef1SRichard Henderson t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
168e9ad3ef1SRichard Henderson
169e9ad3ef1SRichard Henderson if (a->v) {
170e9ad3ef1SRichard Henderson /* Vertical slice -- use sme mova helpers. */
171e9ad3ef1SRichard Henderson if (a->to_vec) {
172e9ad3ef1SRichard Henderson zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
173e9ad3ef1SRichard Henderson } else {
174e9ad3ef1SRichard Henderson cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
175e9ad3ef1SRichard Henderson }
176e9ad3ef1SRichard Henderson } else {
177e9ad3ef1SRichard Henderson /* Horizontal slice -- reuse sve sel helpers. */
178e9ad3ef1SRichard Henderson if (a->to_vec) {
179e9ad3ef1SRichard Henderson h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
180e9ad3ef1SRichard Henderson } else {
181e9ad3ef1SRichard Henderson h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
182e9ad3ef1SRichard Henderson }
183e9ad3ef1SRichard Henderson }
184e9ad3ef1SRichard Henderson return true;
185e9ad3ef1SRichard Henderson }
1867390e0e9SRichard Henderson
trans_LDST1(DisasContext * s,arg_LDST1 * a)1877390e0e9SRichard Henderson static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
1887390e0e9SRichard Henderson {
1897390e0e9SRichard Henderson typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
1907390e0e9SRichard Henderson
1917390e0e9SRichard Henderson /*
1927390e0e9SRichard Henderson * Indexed by [esz][be][v][mte][st], which is (except for load/store)
1937390e0e9SRichard Henderson * also the order in which the elements appear in the function names,
1947390e0e9SRichard Henderson * and so how we must concatenate the pieces.
1957390e0e9SRichard Henderson */
1967390e0e9SRichard Henderson
1977390e0e9SRichard Henderson #define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
1987390e0e9SRichard Henderson #define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
1997390e0e9SRichard Henderson #define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
2007390e0e9SRichard Henderson #define FN_END(L, B) { FN_HV(L), FN_HV(B) }
2017390e0e9SRichard Henderson
2027390e0e9SRichard Henderson static GenLdSt1 * const fns[5][2][2][2][2] = {
2037390e0e9SRichard Henderson FN_END(b, b),
2047390e0e9SRichard Henderson FN_END(h_le, h_be),
2057390e0e9SRichard Henderson FN_END(s_le, s_be),
2067390e0e9SRichard Henderson FN_END(d_le, d_be),
2077390e0e9SRichard Henderson FN_END(q_le, q_be),
2087390e0e9SRichard Henderson };
2097390e0e9SRichard Henderson
2107390e0e9SRichard Henderson #undef FN_LS
2117390e0e9SRichard Henderson #undef FN_MTE
2127390e0e9SRichard Henderson #undef FN_HV
2137390e0e9SRichard Henderson #undef FN_END
2147390e0e9SRichard Henderson
2157390e0e9SRichard Henderson TCGv_ptr t_za, t_pg;
2167390e0e9SRichard Henderson TCGv_i64 addr;
21796fcc998SRichard Henderson uint32_t desc;
2187390e0e9SRichard Henderson bool be = s->be_data == MO_BE;
2197390e0e9SRichard Henderson bool mte = s->mte_active[0];
2207390e0e9SRichard Henderson
2217390e0e9SRichard Henderson if (!dc_isar_feature(aa64_sme, s)) {
2227390e0e9SRichard Henderson return false;
2237390e0e9SRichard Henderson }
2247390e0e9SRichard Henderson if (!sme_smza_enabled_check(s)) {
2257390e0e9SRichard Henderson return true;
2267390e0e9SRichard Henderson }
2277390e0e9SRichard Henderson
2287390e0e9SRichard Henderson t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
2297390e0e9SRichard Henderson t_pg = pred_full_reg_ptr(s, a->pg);
2307390e0e9SRichard Henderson addr = tcg_temp_new_i64();
2317390e0e9SRichard Henderson
2327390e0e9SRichard Henderson tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
2337390e0e9SRichard Henderson tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
2347390e0e9SRichard Henderson
23596fcc998SRichard Henderson if (!mte) {
2367390e0e9SRichard Henderson addr = clean_data_tbi(s, addr);
2377390e0e9SRichard Henderson }
23896fcc998SRichard Henderson
23996fcc998SRichard Henderson desc = make_svemte_desc(s, streaming_vec_reg_size(s), 1, a->esz, a->st, 0);
2407390e0e9SRichard Henderson
241ad75a51eSRichard Henderson fns[a->esz][be][a->v][mte][a->st](tcg_env, t_za, t_pg, addr,
2427390e0e9SRichard Henderson tcg_constant_i32(desc));
2437390e0e9SRichard Henderson return true;
2447390e0e9SRichard Henderson }
2454c46a5f1SRichard Henderson
2464c46a5f1SRichard Henderson typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
2474c46a5f1SRichard Henderson
do_ldst_r(DisasContext * s,arg_ldstr * a,GenLdStR * fn)2484c46a5f1SRichard Henderson static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
2494c46a5f1SRichard Henderson {
2504c46a5f1SRichard Henderson int svl = streaming_vec_reg_size(s);
2514c46a5f1SRichard Henderson int imm = a->imm;
2524c46a5f1SRichard Henderson TCGv_ptr base;
2534c46a5f1SRichard Henderson
2544c46a5f1SRichard Henderson if (!sme_za_enabled_check(s)) {
2554c46a5f1SRichard Henderson return true;
2564c46a5f1SRichard Henderson }
2574c46a5f1SRichard Henderson
2584c46a5f1SRichard Henderson /* ZA[n] equates to ZA0H.B[n]. */
2594c46a5f1SRichard Henderson base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
2604c46a5f1SRichard Henderson
2614c46a5f1SRichard Henderson fn(s, base, 0, svl, a->rn, imm * svl);
2624c46a5f1SRichard Henderson return true;
2634c46a5f1SRichard Henderson }
2644c46a5f1SRichard Henderson
TRANS_FEAT(LDR,aa64_sme,do_ldst_r,a,gen_sve_ldr)2654c46a5f1SRichard Henderson TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
2664c46a5f1SRichard Henderson TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
267bc4420d9SRichard Henderson
268bc4420d9SRichard Henderson static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
269bc4420d9SRichard Henderson gen_helper_gvec_4 *fn)
270bc4420d9SRichard Henderson {
271bc4420d9SRichard Henderson int svl = streaming_vec_reg_size(s);
272bc4420d9SRichard Henderson uint32_t desc = simd_desc(svl, svl, 0);
273bc4420d9SRichard Henderson TCGv_ptr za, zn, pn, pm;
274bc4420d9SRichard Henderson
275bc4420d9SRichard Henderson if (!sme_smza_enabled_check(s)) {
276bc4420d9SRichard Henderson return true;
277bc4420d9SRichard Henderson }
278bc4420d9SRichard Henderson
2791f51573fSRichard Henderson za = get_tile(s, esz, a->zad);
280bc4420d9SRichard Henderson zn = vec_full_reg_ptr(s, a->zn);
281bc4420d9SRichard Henderson pn = pred_full_reg_ptr(s, a->pn);
282bc4420d9SRichard Henderson pm = pred_full_reg_ptr(s, a->pm);
283bc4420d9SRichard Henderson
284bc4420d9SRichard Henderson fn(za, zn, pn, pm, tcg_constant_i32(desc));
285bc4420d9SRichard Henderson return true;
286bc4420d9SRichard Henderson }
287bc4420d9SRichard Henderson
TRANS_FEAT(ADDHA_s,aa64_sme,do_adda,a,MO_32,gen_helper_sme_addha_s)288bc4420d9SRichard Henderson TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
289bc4420d9SRichard Henderson TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
290bc4420d9SRichard Henderson TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
291bc4420d9SRichard Henderson TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
292558e956cSRichard Henderson
293920f640dSRichard Henderson static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
294920f640dSRichard Henderson gen_helper_gvec_5 *fn)
295920f640dSRichard Henderson {
296920f640dSRichard Henderson int svl = streaming_vec_reg_size(s);
297920f640dSRichard Henderson uint32_t desc = simd_desc(svl, svl, a->sub);
298920f640dSRichard Henderson TCGv_ptr za, zn, zm, pn, pm;
299920f640dSRichard Henderson
300920f640dSRichard Henderson if (!sme_smza_enabled_check(s)) {
301920f640dSRichard Henderson return true;
302920f640dSRichard Henderson }
303920f640dSRichard Henderson
3041f51573fSRichard Henderson za = get_tile(s, esz, a->zad);
305920f640dSRichard Henderson zn = vec_full_reg_ptr(s, a->zn);
306920f640dSRichard Henderson zm = vec_full_reg_ptr(s, a->zm);
307920f640dSRichard Henderson pn = pred_full_reg_ptr(s, a->pn);
308920f640dSRichard Henderson pm = pred_full_reg_ptr(s, a->pm);
309920f640dSRichard Henderson
310920f640dSRichard Henderson fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
311920f640dSRichard Henderson return true;
312920f640dSRichard Henderson }
313920f640dSRichard Henderson
do_outprod_fpst(DisasContext * s,arg_op * a,MemOp esz,ARMFPStatusFlavour e_fpst,gen_helper_gvec_5_ptr * fn)314558e956cSRichard Henderson static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
315207d30b5SRichard Henderson ARMFPStatusFlavour e_fpst,
316558e956cSRichard Henderson gen_helper_gvec_5_ptr *fn)
317558e956cSRichard Henderson {
318558e956cSRichard Henderson int svl = streaming_vec_reg_size(s);
319558e956cSRichard Henderson uint32_t desc = simd_desc(svl, svl, a->sub);
320558e956cSRichard Henderson TCGv_ptr za, zn, zm, pn, pm, fpst;
321558e956cSRichard Henderson
322558e956cSRichard Henderson if (!sme_smza_enabled_check(s)) {
323558e956cSRichard Henderson return true;
324558e956cSRichard Henderson }
325558e956cSRichard Henderson
3261f51573fSRichard Henderson za = get_tile(s, esz, a->zad);
327558e956cSRichard Henderson zn = vec_full_reg_ptr(s, a->zn);
328558e956cSRichard Henderson zm = vec_full_reg_ptr(s, a->zm);
329558e956cSRichard Henderson pn = pred_full_reg_ptr(s, a->pn);
330558e956cSRichard Henderson pm = pred_full_reg_ptr(s, a->pm);
331207d30b5SRichard Henderson fpst = fpstatus_ptr(e_fpst);
332558e956cSRichard Henderson
333558e956cSRichard Henderson fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
334558e956cSRichard Henderson return true;
335558e956cSRichard Henderson }
336558e956cSRichard Henderson
do_outprod_env(DisasContext * s,arg_op * a,MemOp esz,gen_helper_gvec_5_ptr * fn)33755f9f4eeSPeter Maydell static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz,
33855f9f4eeSPeter Maydell gen_helper_gvec_5_ptr *fn)
33955f9f4eeSPeter Maydell {
34055f9f4eeSPeter Maydell int svl = streaming_vec_reg_size(s);
34155f9f4eeSPeter Maydell uint32_t desc = simd_desc(svl, svl, a->sub);
34255f9f4eeSPeter Maydell TCGv_ptr za, zn, zm, pn, pm;
34355f9f4eeSPeter Maydell
34455f9f4eeSPeter Maydell if (!sme_smza_enabled_check(s)) {
34555f9f4eeSPeter Maydell return true;
34655f9f4eeSPeter Maydell }
34755f9f4eeSPeter Maydell
34855f9f4eeSPeter Maydell za = get_tile(s, esz, a->zad);
34955f9f4eeSPeter Maydell zn = vec_full_reg_ptr(s, a->zn);
35055f9f4eeSPeter Maydell zm = vec_full_reg_ptr(s, a->zm);
35155f9f4eeSPeter Maydell pn = pred_full_reg_ptr(s, a->pn);
35255f9f4eeSPeter Maydell pm = pred_full_reg_ptr(s, a->pm);
35355f9f4eeSPeter Maydell
35455f9f4eeSPeter Maydell fn(za, zn, zm, pn, pm, tcg_env, tcg_constant_i32(desc));
35555f9f4eeSPeter Maydell return true;
35655f9f4eeSPeter Maydell }
35755f9f4eeSPeter Maydell
35855f9f4eeSPeter Maydell TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a,
35955f9f4eeSPeter Maydell MO_32, gen_helper_sme_fmopa_h)
360207d30b5SRichard Henderson TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
361*e107a7a5SPeter Maydell MO_32, FPST_A64, gen_helper_sme_fmopa_s)
362207d30b5SRichard Henderson TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
363*e107a7a5SPeter Maydell MO_64, FPST_A64, gen_helper_sme_fmopa_d)
364920f640dSRichard Henderson
365ecabcfa4SPeter Maydell TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa)
36623a5e385SRichard Henderson
36723a5e385SRichard Henderson TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
36823a5e385SRichard Henderson TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
36923a5e385SRichard Henderson TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
37023a5e385SRichard Henderson TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
37123a5e385SRichard Henderson
37223a5e385SRichard Henderson TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
37323a5e385SRichard Henderson TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
37423a5e385SRichard Henderson TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
37523a5e385SRichard Henderson TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
376