1a11efe30SRichard Henderson /* 2a11efe30SRichard Henderson * AArch64 generic vector expansion 3a11efe30SRichard Henderson * 4a11efe30SRichard Henderson * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5a11efe30SRichard Henderson * 6a11efe30SRichard Henderson * This library is free software; you can redistribute it and/or 7a11efe30SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8a11efe30SRichard Henderson * License as published by the Free Software Foundation; either 9a11efe30SRichard Henderson * version 2.1 of the License, or (at your option) any later version. 10a11efe30SRichard Henderson * 11a11efe30SRichard Henderson * This library is distributed in the hope that it will be useful, 12a11efe30SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13a11efe30SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14a11efe30SRichard Henderson * Lesser General Public License for more details. 15a11efe30SRichard Henderson * 16a11efe30SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17a11efe30SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18a11efe30SRichard Henderson */ 19a11efe30SRichard Henderson 20a11efe30SRichard Henderson #include "qemu/osdep.h" 21a11efe30SRichard Henderson #include "translate.h" 22a11efe30SRichard Henderson #include "translate-a64.h" 23a11efe30SRichard Henderson 24a11efe30SRichard Henderson 25a11efe30SRichard Henderson static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 26a11efe30SRichard Henderson { 27a11efe30SRichard Henderson tcg_gen_rotli_i64(d, m, 1); 28a11efe30SRichard Henderson tcg_gen_xor_i64(d, d, n); 29a11efe30SRichard Henderson } 30a11efe30SRichard Henderson 31a11efe30SRichard Henderson static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) 32a11efe30SRichard Henderson { 33a11efe30SRichard Henderson tcg_gen_rotli_vec(vece, d, m, 1); 34a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, d, n); 35a11efe30SRichard Henderson } 36a11efe30SRichard Henderson 37a11efe30SRichard Henderson void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 38a11efe30SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 39a11efe30SRichard Henderson { 40a11efe30SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; 41a11efe30SRichard Henderson static const GVecGen3 op = { 42a11efe30SRichard Henderson .fni8 = gen_rax1_i64, 43a11efe30SRichard Henderson .fniv = gen_rax1_vec, 44a11efe30SRichard Henderson .opt_opc = vecop_list, 45a11efe30SRichard Henderson .fno = gen_helper_crypto_rax1, 46a11efe30SRichard Henderson .vece = MO_64, 47a11efe30SRichard Henderson }; 48a11efe30SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); 49a11efe30SRichard Henderson } 50a11efe30SRichard Henderson 51a11efe30SRichard Henderson static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 52a11efe30SRichard Henderson { 53a11efe30SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 54a11efe30SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff >> sh); 55a11efe30SRichard Henderson 56a11efe30SRichard Henderson tcg_gen_xor_i64(t, n, m); 57a11efe30SRichard Henderson tcg_gen_shri_i64(d, t, sh); 58a11efe30SRichard Henderson tcg_gen_shli_i64(t, t, 8 - sh); 59a11efe30SRichard Henderson tcg_gen_andi_i64(d, d, mask); 60a11efe30SRichard Henderson tcg_gen_andi_i64(t, t, ~mask); 61a11efe30SRichard Henderson tcg_gen_or_i64(d, d, t); 62a11efe30SRichard Henderson } 63a11efe30SRichard Henderson 64a11efe30SRichard Henderson static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 65a11efe30SRichard Henderson { 66a11efe30SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 67a11efe30SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff >> sh); 68a11efe30SRichard Henderson 69a11efe30SRichard Henderson tcg_gen_xor_i64(t, n, m); 70a11efe30SRichard Henderson tcg_gen_shri_i64(d, t, sh); 71a11efe30SRichard Henderson tcg_gen_shli_i64(t, t, 16 - sh); 72a11efe30SRichard Henderson tcg_gen_andi_i64(d, d, mask); 73a11efe30SRichard Henderson tcg_gen_andi_i64(t, t, ~mask); 74a11efe30SRichard Henderson tcg_gen_or_i64(d, d, t); 75a11efe30SRichard Henderson } 76a11efe30SRichard Henderson 77a11efe30SRichard Henderson static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 78a11efe30SRichard Henderson { 79a11efe30SRichard Henderson tcg_gen_xor_i32(d, n, m); 80a11efe30SRichard Henderson tcg_gen_rotri_i32(d, d, sh); 81a11efe30SRichard Henderson } 82a11efe30SRichard Henderson 83a11efe30SRichard Henderson static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 84a11efe30SRichard Henderson { 85a11efe30SRichard Henderson tcg_gen_xor_i64(d, n, m); 86a11efe30SRichard Henderson tcg_gen_rotri_i64(d, d, sh); 87a11efe30SRichard Henderson } 88a11efe30SRichard Henderson 89a11efe30SRichard Henderson static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 90a11efe30SRichard Henderson TCGv_vec m, int64_t sh) 91a11efe30SRichard Henderson { 92a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, n, m); 93a11efe30SRichard Henderson tcg_gen_rotri_vec(vece, d, d, sh); 94a11efe30SRichard Henderson } 95a11efe30SRichard Henderson 96a11efe30SRichard Henderson void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 97a11efe30SRichard Henderson uint32_t rm_ofs, int64_t shift, 98a11efe30SRichard Henderson uint32_t opr_sz, uint32_t max_sz) 99a11efe30SRichard Henderson { 100a11efe30SRichard Henderson static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 101a11efe30SRichard Henderson static const GVecGen3i ops[4] = { 102a11efe30SRichard Henderson { .fni8 = gen_xar8_i64, 103a11efe30SRichard Henderson .fniv = gen_xar_vec, 104a11efe30SRichard Henderson .fno = gen_helper_sve2_xar_b, 105a11efe30SRichard Henderson .opt_opc = vecop, 106a11efe30SRichard Henderson .vece = MO_8 }, 107a11efe30SRichard Henderson { .fni8 = gen_xar16_i64, 108a11efe30SRichard Henderson .fniv = gen_xar_vec, 109a11efe30SRichard Henderson .fno = gen_helper_sve2_xar_h, 110a11efe30SRichard Henderson .opt_opc = vecop, 111a11efe30SRichard Henderson .vece = MO_16 }, 112a11efe30SRichard Henderson { .fni4 = gen_xar_i32, 113a11efe30SRichard Henderson .fniv = gen_xar_vec, 114a11efe30SRichard Henderson .fno = gen_helper_sve2_xar_s, 115a11efe30SRichard Henderson .opt_opc = vecop, 116a11efe30SRichard Henderson .vece = MO_32 }, 117a11efe30SRichard Henderson { .fni8 = gen_xar_i64, 118a11efe30SRichard Henderson .fniv = gen_xar_vec, 119a11efe30SRichard Henderson .fno = gen_helper_gvec_xar_d, 120a11efe30SRichard Henderson .opt_opc = vecop, 121a11efe30SRichard Henderson .vece = MO_64 } 122a11efe30SRichard Henderson }; 123a11efe30SRichard Henderson int esize = 8 << vece; 124a11efe30SRichard Henderson 125a11efe30SRichard Henderson /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 126a11efe30SRichard Henderson tcg_debug_assert(shift >= 0); 127a11efe30SRichard Henderson tcg_debug_assert(shift <= esize); 128a11efe30SRichard Henderson shift &= esize - 1; 129a11efe30SRichard Henderson 130a11efe30SRichard Henderson if (shift == 0) { 131a11efe30SRichard Henderson /* xar with no rotate devolves to xor. */ 132a11efe30SRichard Henderson tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 133a11efe30SRichard Henderson } else { 134a11efe30SRichard Henderson tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 135a11efe30SRichard Henderson shift, &ops[vece]); 136a11efe30SRichard Henderson } 137a11efe30SRichard Henderson } 138a11efe30SRichard Henderson 139a11efe30SRichard Henderson static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 140a11efe30SRichard Henderson { 141a11efe30SRichard Henderson tcg_gen_xor_i64(d, n, m); 142a11efe30SRichard Henderson tcg_gen_xor_i64(d, d, k); 143a11efe30SRichard Henderson } 144a11efe30SRichard Henderson 145a11efe30SRichard Henderson static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 146a11efe30SRichard Henderson TCGv_vec m, TCGv_vec k) 147a11efe30SRichard Henderson { 148a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, n, m); 149a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, d, k); 150a11efe30SRichard Henderson } 151a11efe30SRichard Henderson 152a11efe30SRichard Henderson void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 153a11efe30SRichard Henderson uint32_t a, uint32_t oprsz, uint32_t maxsz) 154a11efe30SRichard Henderson { 155a11efe30SRichard Henderson static const GVecGen4 op = { 156a11efe30SRichard Henderson .fni8 = gen_eor3_i64, 157a11efe30SRichard Henderson .fniv = gen_eor3_vec, 158a11efe30SRichard Henderson .fno = gen_helper_sve2_eor3, 159a11efe30SRichard Henderson .vece = MO_64, 160a11efe30SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 161a11efe30SRichard Henderson }; 162a11efe30SRichard Henderson tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 163a11efe30SRichard Henderson } 164a11efe30SRichard Henderson 165a11efe30SRichard Henderson static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 166a11efe30SRichard Henderson { 167a11efe30SRichard Henderson tcg_gen_andc_i64(d, m, k); 168a11efe30SRichard Henderson tcg_gen_xor_i64(d, d, n); 169a11efe30SRichard Henderson } 170a11efe30SRichard Henderson 171a11efe30SRichard Henderson static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 172a11efe30SRichard Henderson TCGv_vec m, TCGv_vec k) 173a11efe30SRichard Henderson { 174a11efe30SRichard Henderson tcg_gen_andc_vec(vece, d, m, k); 175a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, d, n); 176a11efe30SRichard Henderson } 177a11efe30SRichard Henderson 178a11efe30SRichard Henderson void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 179a11efe30SRichard Henderson uint32_t a, uint32_t oprsz, uint32_t maxsz) 180a11efe30SRichard Henderson { 181a11efe30SRichard Henderson static const GVecGen4 op = { 182a11efe30SRichard Henderson .fni8 = gen_bcax_i64, 183a11efe30SRichard Henderson .fniv = gen_bcax_vec, 184a11efe30SRichard Henderson .fno = gen_helper_sve2_bcax, 185a11efe30SRichard Henderson .vece = MO_64, 186a11efe30SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 187a11efe30SRichard Henderson }; 188a11efe30SRichard Henderson tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 189a11efe30SRichard Henderson } 190a11efe30SRichard Henderson 191*1217edacSRichard Henderson /* 192*1217edacSRichard Henderson * Set @res to the correctly saturated result. 193*1217edacSRichard Henderson * Set @qc non-zero if saturation occured. 194*1217edacSRichard Henderson */ 195*1217edacSRichard Henderson void gen_suqadd_bhs(TCGv_i64 res, TCGv_i64 qc, 196*1217edacSRichard Henderson TCGv_i64 a, TCGv_i64 b, MemOp esz) 197*1217edacSRichard Henderson { 198*1217edacSRichard Henderson TCGv_i64 max = tcg_constant_i64((1ull << ((8 << esz) - 1)) - 1); 199*1217edacSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 200*1217edacSRichard Henderson 201*1217edacSRichard Henderson tcg_gen_add_i64(t, a, b); 202*1217edacSRichard Henderson tcg_gen_smin_i64(res, t, max); 203*1217edacSRichard Henderson tcg_gen_xor_i64(t, t, res); 204*1217edacSRichard Henderson tcg_gen_or_i64(qc, qc, t); 205*1217edacSRichard Henderson } 206*1217edacSRichard Henderson 207*1217edacSRichard Henderson void gen_suqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 208*1217edacSRichard Henderson { 209*1217edacSRichard Henderson TCGv_i64 max = tcg_constant_i64(INT64_MAX); 210*1217edacSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 211*1217edacSRichard Henderson 212*1217edacSRichard Henderson /* Maximum value that can be added to @a without overflow. */ 213*1217edacSRichard Henderson tcg_gen_sub_i64(t, max, a); 214*1217edacSRichard Henderson 215*1217edacSRichard Henderson /* Constrain addend so that the next addition never overflows. */ 216*1217edacSRichard Henderson tcg_gen_umin_i64(t, t, b); 217*1217edacSRichard Henderson tcg_gen_add_i64(res, a, t); 218*1217edacSRichard Henderson 219*1217edacSRichard Henderson tcg_gen_xor_i64(t, t, b); 220*1217edacSRichard Henderson tcg_gen_or_i64(qc, qc, t); 221*1217edacSRichard Henderson } 222*1217edacSRichard Henderson 2238f6343aeSRichard Henderson static void gen_suqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 2248f6343aeSRichard Henderson TCGv_vec a, TCGv_vec b) 2258f6343aeSRichard Henderson { 2268f6343aeSRichard Henderson TCGv_vec max = 2278f6343aeSRichard Henderson tcg_constant_vec_matching(t, vece, (1ull << ((8 << vece) - 1)) - 1); 2288f6343aeSRichard Henderson TCGv_vec u = tcg_temp_new_vec_matching(t); 2298f6343aeSRichard Henderson 2308f6343aeSRichard Henderson /* Maximum value that can be added to @a without overflow. */ 2318f6343aeSRichard Henderson tcg_gen_sub_vec(vece, u, max, a); 2328f6343aeSRichard Henderson 2338f6343aeSRichard Henderson /* Constrain addend so that the next addition never overflows. */ 2348f6343aeSRichard Henderson tcg_gen_umin_vec(vece, u, u, b); 2358f6343aeSRichard Henderson tcg_gen_add_vec(vece, t, u, a); 2368f6343aeSRichard Henderson 2378f6343aeSRichard Henderson /* Compute QC by comparing the adjusted @b. */ 2388f6343aeSRichard Henderson tcg_gen_xor_vec(vece, u, u, b); 2398f6343aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, u); 2408f6343aeSRichard Henderson } 2418f6343aeSRichard Henderson 2428f6343aeSRichard Henderson void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs, 2438f6343aeSRichard Henderson uint32_t rn_ofs, uint32_t rm_ofs, 2448f6343aeSRichard Henderson uint32_t opr_sz, uint32_t max_sz) 2458f6343aeSRichard Henderson { 2468f6343aeSRichard Henderson static const TCGOpcode vecop_list[] = { 2478f6343aeSRichard Henderson INDEX_op_add_vec, INDEX_op_sub_vec, INDEX_op_umin_vec, 0 2488f6343aeSRichard Henderson }; 2498f6343aeSRichard Henderson static const GVecGen4 ops[4] = { 2508f6343aeSRichard Henderson { .fniv = gen_suqadd_vec, 2518f6343aeSRichard Henderson .fno = gen_helper_gvec_suqadd_b, 2528f6343aeSRichard Henderson .opt_opc = vecop_list, 2538f6343aeSRichard Henderson .write_aofs = true, 2548f6343aeSRichard Henderson .vece = MO_8 }, 2558f6343aeSRichard Henderson { .fniv = gen_suqadd_vec, 2568f6343aeSRichard Henderson .fno = gen_helper_gvec_suqadd_h, 2578f6343aeSRichard Henderson .opt_opc = vecop_list, 2588f6343aeSRichard Henderson .write_aofs = true, 2598f6343aeSRichard Henderson .vece = MO_16 }, 2608f6343aeSRichard Henderson { .fniv = gen_suqadd_vec, 2618f6343aeSRichard Henderson .fno = gen_helper_gvec_suqadd_s, 2628f6343aeSRichard Henderson .opt_opc = vecop_list, 2638f6343aeSRichard Henderson .write_aofs = true, 2648f6343aeSRichard Henderson .vece = MO_32 }, 2658f6343aeSRichard Henderson { .fniv = gen_suqadd_vec, 266*1217edacSRichard Henderson .fni8 = gen_suqadd_d, 2678f6343aeSRichard Henderson .fno = gen_helper_gvec_suqadd_d, 2688f6343aeSRichard Henderson .opt_opc = vecop_list, 2698f6343aeSRichard Henderson .write_aofs = true, 2708f6343aeSRichard Henderson .vece = MO_64 }, 2718f6343aeSRichard Henderson }; 2728f6343aeSRichard Henderson 2738f6343aeSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 2748f6343aeSRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 2758f6343aeSRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 2768f6343aeSRichard Henderson } 2778f6343aeSRichard Henderson 278*1217edacSRichard Henderson void gen_usqadd_bhs(TCGv_i64 res, TCGv_i64 qc, 279*1217edacSRichard Henderson TCGv_i64 a, TCGv_i64 b, MemOp esz) 280*1217edacSRichard Henderson { 281*1217edacSRichard Henderson TCGv_i64 max = tcg_constant_i64(MAKE_64BIT_MASK(0, 8 << esz)); 282*1217edacSRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 283*1217edacSRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 284*1217edacSRichard Henderson 285*1217edacSRichard Henderson tcg_gen_add_i64(tmp, a, b); 286*1217edacSRichard Henderson tcg_gen_smin_i64(res, tmp, max); 287*1217edacSRichard Henderson tcg_gen_smax_i64(res, res, zero); 288*1217edacSRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 289*1217edacSRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 290*1217edacSRichard Henderson } 291*1217edacSRichard Henderson 292*1217edacSRichard Henderson void gen_usqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 293*1217edacSRichard Henderson { 294*1217edacSRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 295*1217edacSRichard Henderson TCGv_i64 tneg = tcg_temp_new_i64(); 296*1217edacSRichard Henderson TCGv_i64 tpos = tcg_temp_new_i64(); 297*1217edacSRichard Henderson TCGv_i64 max = tcg_constant_i64(UINT64_MAX); 298*1217edacSRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 299*1217edacSRichard Henderson 300*1217edacSRichard Henderson tcg_gen_add_i64(tmp, a, b); 301*1217edacSRichard Henderson 302*1217edacSRichard Henderson /* If @b is positive, saturate if (a + b) < a, aka unsigned overflow. */ 303*1217edacSRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, tpos, tmp, a, max, tmp); 304*1217edacSRichard Henderson 305*1217edacSRichard Henderson /* If @b is negative, saturate if a < -b, ie subtraction is negative. */ 306*1217edacSRichard Henderson tcg_gen_neg_i64(tneg, b); 307*1217edacSRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, tneg, a, tneg, zero, tmp); 308*1217edacSRichard Henderson 309*1217edacSRichard Henderson /* Select correct result from sign of @b. */ 310*1217edacSRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, b, zero, tneg, tpos); 311*1217edacSRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 312*1217edacSRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 313*1217edacSRichard Henderson } 314*1217edacSRichard Henderson 3158f6343aeSRichard Henderson static void gen_usqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 3168f6343aeSRichard Henderson TCGv_vec a, TCGv_vec b) 3178f6343aeSRichard Henderson { 3188f6343aeSRichard Henderson TCGv_vec u = tcg_temp_new_vec_matching(t); 3198f6343aeSRichard Henderson TCGv_vec z = tcg_constant_vec_matching(t, vece, 0); 3208f6343aeSRichard Henderson 3218f6343aeSRichard Henderson /* Compute unsigned saturation of add for +b and sub for -b. */ 3228f6343aeSRichard Henderson tcg_gen_neg_vec(vece, t, b); 3238f6343aeSRichard Henderson tcg_gen_usadd_vec(vece, u, a, b); 3248f6343aeSRichard Henderson tcg_gen_ussub_vec(vece, t, a, t); 3258f6343aeSRichard Henderson 3268f6343aeSRichard Henderson /* Select the correct result depending on the sign of b. */ 3278f6343aeSRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, b, z, t, u); 3288f6343aeSRichard Henderson 3298f6343aeSRichard Henderson /* Compute QC by comparing against the non-saturated result. */ 3308f6343aeSRichard Henderson tcg_gen_add_vec(vece, u, a, b); 3318f6343aeSRichard Henderson tcg_gen_xor_vec(vece, u, u, t); 3328f6343aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, u); 3338f6343aeSRichard Henderson } 3348f6343aeSRichard Henderson 3358f6343aeSRichard Henderson void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs, 3368f6343aeSRichard Henderson uint32_t rn_ofs, uint32_t rm_ofs, 3378f6343aeSRichard Henderson uint32_t opr_sz, uint32_t max_sz) 3388f6343aeSRichard Henderson { 3398f6343aeSRichard Henderson static const TCGOpcode vecop_list[] = { 3408f6343aeSRichard Henderson INDEX_op_neg_vec, INDEX_op_add_vec, 3418f6343aeSRichard Henderson INDEX_op_usadd_vec, INDEX_op_ussub_vec, 3428f6343aeSRichard Henderson INDEX_op_cmpsel_vec, 0 3438f6343aeSRichard Henderson }; 3448f6343aeSRichard Henderson static const GVecGen4 ops[4] = { 3458f6343aeSRichard Henderson { .fniv = gen_usqadd_vec, 3468f6343aeSRichard Henderson .fno = gen_helper_gvec_usqadd_b, 3478f6343aeSRichard Henderson .opt_opc = vecop_list, 3488f6343aeSRichard Henderson .write_aofs = true, 3498f6343aeSRichard Henderson .vece = MO_8 }, 3508f6343aeSRichard Henderson { .fniv = gen_usqadd_vec, 3518f6343aeSRichard Henderson .fno = gen_helper_gvec_usqadd_h, 3528f6343aeSRichard Henderson .opt_opc = vecop_list, 3538f6343aeSRichard Henderson .write_aofs = true, 3548f6343aeSRichard Henderson .vece = MO_16 }, 3558f6343aeSRichard Henderson { .fniv = gen_usqadd_vec, 3568f6343aeSRichard Henderson .fno = gen_helper_gvec_usqadd_s, 3578f6343aeSRichard Henderson .opt_opc = vecop_list, 3588f6343aeSRichard Henderson .write_aofs = true, 3598f6343aeSRichard Henderson .vece = MO_32 }, 3608f6343aeSRichard Henderson { .fniv = gen_usqadd_vec, 361*1217edacSRichard Henderson .fni8 = gen_usqadd_d, 3628f6343aeSRichard Henderson .fno = gen_helper_gvec_usqadd_d, 3638f6343aeSRichard Henderson .opt_opc = vecop_list, 3648f6343aeSRichard Henderson .write_aofs = true, 3658f6343aeSRichard Henderson .vece = MO_64 }, 3668f6343aeSRichard Henderson }; 3678f6343aeSRichard Henderson 3688f6343aeSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 3698f6343aeSRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 3708f6343aeSRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 3718f6343aeSRichard Henderson } 372