109a52d85SRichard Henderson /* 209a52d85SRichard Henderson * ARM generic vector expansion 309a52d85SRichard Henderson * 409a52d85SRichard Henderson * Copyright (c) 2003 Fabrice Bellard 509a52d85SRichard Henderson * Copyright (c) 2005-2007 CodeSourcery 609a52d85SRichard Henderson * Copyright (c) 2007 OpenedHand, Ltd. 709a52d85SRichard Henderson * 809a52d85SRichard Henderson * This library is free software; you can redistribute it and/or 909a52d85SRichard Henderson * modify it under the terms of the GNU Lesser General Public 1009a52d85SRichard Henderson * License as published by the Free Software Foundation; either 1109a52d85SRichard Henderson * version 2.1 of the License, or (at your option) any later version. 1209a52d85SRichard Henderson * 1309a52d85SRichard Henderson * This library is distributed in the hope that it will be useful, 1409a52d85SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 1509a52d85SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1609a52d85SRichard Henderson * Lesser General Public License for more details. 1709a52d85SRichard Henderson * 1809a52d85SRichard Henderson * You should have received a copy of the GNU Lesser General Public 1909a52d85SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 2009a52d85SRichard Henderson */ 2109a52d85SRichard Henderson 2209a52d85SRichard Henderson #include "qemu/osdep.h" 2309a52d85SRichard Henderson #include "translate.h" 2409a52d85SRichard Henderson 2509a52d85SRichard Henderson 2609a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 2709a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz, 2809a52d85SRichard Henderson gen_helper_gvec_3_ptr *fn) 2909a52d85SRichard Henderson { 3009a52d85SRichard Henderson TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 3109a52d85SRichard Henderson 3201d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 3309a52d85SRichard Henderson tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 3409a52d85SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 3509a52d85SRichard Henderson opr_sz, max_sz, 0, fn); 3609a52d85SRichard Henderson } 3709a52d85SRichard Henderson 38*8f81dcedSRichard Henderson void gen_gvec_sqdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 39*8f81dcedSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 40*8f81dcedSRichard Henderson { 41*8f81dcedSRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 42*8f81dcedSRichard Henderson gen_helper_neon_sqdmulh_h, gen_helper_neon_sqdmulh_s 43*8f81dcedSRichard Henderson }; 44*8f81dcedSRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 45*8f81dcedSRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 46*8f81dcedSRichard Henderson } 47*8f81dcedSRichard Henderson 48*8f81dcedSRichard Henderson void gen_gvec_sqrdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 49*8f81dcedSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 50*8f81dcedSRichard Henderson { 51*8f81dcedSRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 52*8f81dcedSRichard Henderson gen_helper_neon_sqrdmulh_h, gen_helper_neon_sqrdmulh_s 53*8f81dcedSRichard Henderson }; 54*8f81dcedSRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 55*8f81dcedSRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 56*8f81dcedSRichard Henderson } 57*8f81dcedSRichard Henderson 5809a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 5909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 6009a52d85SRichard Henderson { 6109a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 6209a52d85SRichard Henderson gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 6309a52d85SRichard Henderson }; 6409a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 6509a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 6609a52d85SRichard Henderson } 6709a52d85SRichard Henderson 6809a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 6909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 7009a52d85SRichard Henderson { 7109a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 7209a52d85SRichard Henderson gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 7309a52d85SRichard Henderson }; 7409a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 7509a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 7609a52d85SRichard Henderson } 7709a52d85SRichard Henderson 7809a52d85SRichard Henderson #define GEN_CMP0(NAME, COND) \ 7909a52d85SRichard Henderson void NAME(unsigned vece, uint32_t d, uint32_t m, \ 8009a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz) \ 8109a52d85SRichard Henderson { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 8209a52d85SRichard Henderson 8309a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 8409a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 8509a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 8609a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 8709a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 8809a52d85SRichard Henderson 8909a52d85SRichard Henderson #undef GEN_CMP0 9009a52d85SRichard Henderson 9109a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 9209a52d85SRichard Henderson { 9309a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, shift); 9409a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 9509a52d85SRichard Henderson } 9609a52d85SRichard Henderson 9709a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 9809a52d85SRichard Henderson { 9909a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, shift); 10009a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 10109a52d85SRichard Henderson } 10209a52d85SRichard Henderson 10309a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 10409a52d85SRichard Henderson { 10509a52d85SRichard Henderson tcg_gen_sari_i32(a, a, shift); 10609a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 10709a52d85SRichard Henderson } 10809a52d85SRichard Henderson 10909a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 11009a52d85SRichard Henderson { 11109a52d85SRichard Henderson tcg_gen_sari_i64(a, a, shift); 11209a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 11309a52d85SRichard Henderson } 11409a52d85SRichard Henderson 11509a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 11609a52d85SRichard Henderson { 11709a52d85SRichard Henderson tcg_gen_sari_vec(vece, a, a, sh); 11809a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 11909a52d85SRichard Henderson } 12009a52d85SRichard Henderson 12109a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 12209a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 12309a52d85SRichard Henderson { 12409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 12509a52d85SRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 12609a52d85SRichard Henderson }; 12709a52d85SRichard Henderson static const GVecGen2i ops[4] = { 12809a52d85SRichard Henderson { .fni8 = gen_ssra8_i64, 12909a52d85SRichard Henderson .fniv = gen_ssra_vec, 13009a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_b, 13109a52d85SRichard Henderson .load_dest = true, 13209a52d85SRichard Henderson .opt_opc = vecop_list, 13309a52d85SRichard Henderson .vece = MO_8 }, 13409a52d85SRichard Henderson { .fni8 = gen_ssra16_i64, 13509a52d85SRichard Henderson .fniv = gen_ssra_vec, 13609a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_h, 13709a52d85SRichard Henderson .load_dest = true, 13809a52d85SRichard Henderson .opt_opc = vecop_list, 13909a52d85SRichard Henderson .vece = MO_16 }, 14009a52d85SRichard Henderson { .fni4 = gen_ssra32_i32, 14109a52d85SRichard Henderson .fniv = gen_ssra_vec, 14209a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_s, 14309a52d85SRichard Henderson .load_dest = true, 14409a52d85SRichard Henderson .opt_opc = vecop_list, 14509a52d85SRichard Henderson .vece = MO_32 }, 14609a52d85SRichard Henderson { .fni8 = gen_ssra64_i64, 14709a52d85SRichard Henderson .fniv = gen_ssra_vec, 14809a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_d, 14909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 15009a52d85SRichard Henderson .opt_opc = vecop_list, 15109a52d85SRichard Henderson .load_dest = true, 15209a52d85SRichard Henderson .vece = MO_64 }, 15309a52d85SRichard Henderson }; 15409a52d85SRichard Henderson 15509a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 15609a52d85SRichard Henderson tcg_debug_assert(shift > 0); 15709a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 15809a52d85SRichard Henderson 15909a52d85SRichard Henderson /* 16009a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 16109a52d85SRichard Henderson * Signed results in all sign bits. 16209a52d85SRichard Henderson */ 16309a52d85SRichard Henderson shift = MIN(shift, (8 << vece) - 1); 16409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 16509a52d85SRichard Henderson } 16609a52d85SRichard Henderson 16709a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 16809a52d85SRichard Henderson { 16909a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, shift); 17009a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 17109a52d85SRichard Henderson } 17209a52d85SRichard Henderson 17309a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 17409a52d85SRichard Henderson { 17509a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, shift); 17609a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 17709a52d85SRichard Henderson } 17809a52d85SRichard Henderson 17909a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 18009a52d85SRichard Henderson { 18109a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 18209a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 18309a52d85SRichard Henderson } 18409a52d85SRichard Henderson 18509a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 18609a52d85SRichard Henderson { 18709a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 18809a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 18909a52d85SRichard Henderson } 19009a52d85SRichard Henderson 19109a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 19209a52d85SRichard Henderson { 19309a52d85SRichard Henderson tcg_gen_shri_vec(vece, a, a, sh); 19409a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 19509a52d85SRichard Henderson } 19609a52d85SRichard Henderson 19709a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 19809a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 19909a52d85SRichard Henderson { 20009a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 20109a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 20209a52d85SRichard Henderson }; 20309a52d85SRichard Henderson static const GVecGen2i ops[4] = { 20409a52d85SRichard Henderson { .fni8 = gen_usra8_i64, 20509a52d85SRichard Henderson .fniv = gen_usra_vec, 20609a52d85SRichard Henderson .fno = gen_helper_gvec_usra_b, 20709a52d85SRichard Henderson .load_dest = true, 20809a52d85SRichard Henderson .opt_opc = vecop_list, 20909a52d85SRichard Henderson .vece = MO_8, }, 21009a52d85SRichard Henderson { .fni8 = gen_usra16_i64, 21109a52d85SRichard Henderson .fniv = gen_usra_vec, 21209a52d85SRichard Henderson .fno = gen_helper_gvec_usra_h, 21309a52d85SRichard Henderson .load_dest = true, 21409a52d85SRichard Henderson .opt_opc = vecop_list, 21509a52d85SRichard Henderson .vece = MO_16, }, 21609a52d85SRichard Henderson { .fni4 = gen_usra32_i32, 21709a52d85SRichard Henderson .fniv = gen_usra_vec, 21809a52d85SRichard Henderson .fno = gen_helper_gvec_usra_s, 21909a52d85SRichard Henderson .load_dest = true, 22009a52d85SRichard Henderson .opt_opc = vecop_list, 22109a52d85SRichard Henderson .vece = MO_32, }, 22209a52d85SRichard Henderson { .fni8 = gen_usra64_i64, 22309a52d85SRichard Henderson .fniv = gen_usra_vec, 22409a52d85SRichard Henderson .fno = gen_helper_gvec_usra_d, 22509a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 22609a52d85SRichard Henderson .load_dest = true, 22709a52d85SRichard Henderson .opt_opc = vecop_list, 22809a52d85SRichard Henderson .vece = MO_64, }, 22909a52d85SRichard Henderson }; 23009a52d85SRichard Henderson 23109a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 23209a52d85SRichard Henderson tcg_debug_assert(shift > 0); 23309a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 23409a52d85SRichard Henderson 23509a52d85SRichard Henderson /* 23609a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 23709a52d85SRichard Henderson * Unsigned results in all zeros as input to accumulate: nop. 23809a52d85SRichard Henderson */ 23909a52d85SRichard Henderson if (shift < (8 << vece)) { 24009a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 24109a52d85SRichard Henderson } else { 24209a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 24309a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 24409a52d85SRichard Henderson } 24509a52d85SRichard Henderson } 24609a52d85SRichard Henderson 24709a52d85SRichard Henderson /* 24809a52d85SRichard Henderson * Shift one less than the requested amount, and the low bit is 24909a52d85SRichard Henderson * the rounding bit. For the 8 and 16-bit operations, because we 25009a52d85SRichard Henderson * mask the low bit, we can perform a normal integer shift instead 25109a52d85SRichard Henderson * of a vector shift. 25209a52d85SRichard Henderson */ 25309a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 25409a52d85SRichard Henderson { 25509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 25609a52d85SRichard Henderson 25709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 25809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 25909a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(d, a, sh); 26009a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 26109a52d85SRichard Henderson } 26209a52d85SRichard Henderson 26309a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 26409a52d85SRichard Henderson { 26509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 26609a52d85SRichard Henderson 26709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 26809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 26909a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(d, a, sh); 27009a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 27109a52d85SRichard Henderson } 27209a52d85SRichard Henderson 27309a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 27409a52d85SRichard Henderson { 27509a52d85SRichard Henderson TCGv_i32 t; 27609a52d85SRichard Henderson 27709a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 27809a52d85SRichard Henderson if (sh == 32) { 27909a52d85SRichard Henderson tcg_gen_movi_i32(d, 0); 28009a52d85SRichard Henderson return; 28109a52d85SRichard Henderson } 28209a52d85SRichard Henderson t = tcg_temp_new_i32(); 28309a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 28409a52d85SRichard Henderson tcg_gen_sari_i32(d, a, sh); 28509a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 28609a52d85SRichard Henderson } 28709a52d85SRichard Henderson 28809a52d85SRichard Henderson void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 28909a52d85SRichard Henderson { 29009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 29109a52d85SRichard Henderson 29209a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 29309a52d85SRichard Henderson tcg_gen_sari_i64(d, a, sh); 29409a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 29509a52d85SRichard Henderson } 29609a52d85SRichard Henderson 29709a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 29809a52d85SRichard Henderson { 29909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 30009a52d85SRichard Henderson TCGv_vec ones = tcg_temp_new_vec_matching(d); 30109a52d85SRichard Henderson 30209a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 30309a52d85SRichard Henderson tcg_gen_dupi_vec(vece, ones, 1); 30409a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 30509a52d85SRichard Henderson tcg_gen_sari_vec(vece, d, a, sh); 30609a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 30709a52d85SRichard Henderson } 30809a52d85SRichard Henderson 30909a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 31009a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 31109a52d85SRichard Henderson { 31209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 31309a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 31409a52d85SRichard Henderson }; 31509a52d85SRichard Henderson static const GVecGen2i ops[4] = { 31609a52d85SRichard Henderson { .fni8 = gen_srshr8_i64, 31709a52d85SRichard Henderson .fniv = gen_srshr_vec, 31809a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_b, 31909a52d85SRichard Henderson .opt_opc = vecop_list, 32009a52d85SRichard Henderson .vece = MO_8 }, 32109a52d85SRichard Henderson { .fni8 = gen_srshr16_i64, 32209a52d85SRichard Henderson .fniv = gen_srshr_vec, 32309a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_h, 32409a52d85SRichard Henderson .opt_opc = vecop_list, 32509a52d85SRichard Henderson .vece = MO_16 }, 32609a52d85SRichard Henderson { .fni4 = gen_srshr32_i32, 32709a52d85SRichard Henderson .fniv = gen_srshr_vec, 32809a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_s, 32909a52d85SRichard Henderson .opt_opc = vecop_list, 33009a52d85SRichard Henderson .vece = MO_32 }, 33109a52d85SRichard Henderson { .fni8 = gen_srshr64_i64, 33209a52d85SRichard Henderson .fniv = gen_srshr_vec, 33309a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_d, 33409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 33509a52d85SRichard Henderson .opt_opc = vecop_list, 33609a52d85SRichard Henderson .vece = MO_64 }, 33709a52d85SRichard Henderson }; 33809a52d85SRichard Henderson 33909a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 34009a52d85SRichard Henderson tcg_debug_assert(shift > 0); 34109a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 34209a52d85SRichard Henderson 34309a52d85SRichard Henderson if (shift == (8 << vece)) { 34409a52d85SRichard Henderson /* 34509a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 34609a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 34709a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 34809a52d85SRichard Henderson * I.e. always zero. 34909a52d85SRichard Henderson */ 35009a52d85SRichard Henderson tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 35109a52d85SRichard Henderson } else { 35209a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 35309a52d85SRichard Henderson } 35409a52d85SRichard Henderson } 35509a52d85SRichard Henderson 35609a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 35709a52d85SRichard Henderson { 35809a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 35909a52d85SRichard Henderson 36009a52d85SRichard Henderson gen_srshr8_i64(t, a, sh); 36109a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 36209a52d85SRichard Henderson } 36309a52d85SRichard Henderson 36409a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 36509a52d85SRichard Henderson { 36609a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 36709a52d85SRichard Henderson 36809a52d85SRichard Henderson gen_srshr16_i64(t, a, sh); 36909a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 37009a52d85SRichard Henderson } 37109a52d85SRichard Henderson 37209a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 37309a52d85SRichard Henderson { 37409a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 37509a52d85SRichard Henderson 37609a52d85SRichard Henderson gen_srshr32_i32(t, a, sh); 37709a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 37809a52d85SRichard Henderson } 37909a52d85SRichard Henderson 38009a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 38109a52d85SRichard Henderson { 38209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 38309a52d85SRichard Henderson 38409a52d85SRichard Henderson gen_srshr64_i64(t, a, sh); 38509a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 38609a52d85SRichard Henderson } 38709a52d85SRichard Henderson 38809a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 38909a52d85SRichard Henderson { 39009a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 39109a52d85SRichard Henderson 39209a52d85SRichard Henderson gen_srshr_vec(vece, t, a, sh); 39309a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 39409a52d85SRichard Henderson } 39509a52d85SRichard Henderson 39609a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 39709a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 39809a52d85SRichard Henderson { 39909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 40009a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 40109a52d85SRichard Henderson }; 40209a52d85SRichard Henderson static const GVecGen2i ops[4] = { 40309a52d85SRichard Henderson { .fni8 = gen_srsra8_i64, 40409a52d85SRichard Henderson .fniv = gen_srsra_vec, 40509a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_b, 40609a52d85SRichard Henderson .opt_opc = vecop_list, 40709a52d85SRichard Henderson .load_dest = true, 40809a52d85SRichard Henderson .vece = MO_8 }, 40909a52d85SRichard Henderson { .fni8 = gen_srsra16_i64, 41009a52d85SRichard Henderson .fniv = gen_srsra_vec, 41109a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_h, 41209a52d85SRichard Henderson .opt_opc = vecop_list, 41309a52d85SRichard Henderson .load_dest = true, 41409a52d85SRichard Henderson .vece = MO_16 }, 41509a52d85SRichard Henderson { .fni4 = gen_srsra32_i32, 41609a52d85SRichard Henderson .fniv = gen_srsra_vec, 41709a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_s, 41809a52d85SRichard Henderson .opt_opc = vecop_list, 41909a52d85SRichard Henderson .load_dest = true, 42009a52d85SRichard Henderson .vece = MO_32 }, 42109a52d85SRichard Henderson { .fni8 = gen_srsra64_i64, 42209a52d85SRichard Henderson .fniv = gen_srsra_vec, 42309a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_d, 42409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 42509a52d85SRichard Henderson .opt_opc = vecop_list, 42609a52d85SRichard Henderson .load_dest = true, 42709a52d85SRichard Henderson .vece = MO_64 }, 42809a52d85SRichard Henderson }; 42909a52d85SRichard Henderson 43009a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 43109a52d85SRichard Henderson tcg_debug_assert(shift > 0); 43209a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 43309a52d85SRichard Henderson 43409a52d85SRichard Henderson /* 43509a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 43609a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 43709a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 43809a52d85SRichard Henderson * I.e. always zero. With accumulation, this leaves D unchanged. 43909a52d85SRichard Henderson */ 44009a52d85SRichard Henderson if (shift == (8 << vece)) { 44109a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 44209a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 44309a52d85SRichard Henderson } else { 44409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 44509a52d85SRichard Henderson } 44609a52d85SRichard Henderson } 44709a52d85SRichard Henderson 44809a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 44909a52d85SRichard Henderson { 45009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 45109a52d85SRichard Henderson 45209a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 45309a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 45409a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(d, a, sh); 45509a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 45609a52d85SRichard Henderson } 45709a52d85SRichard Henderson 45809a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 45909a52d85SRichard Henderson { 46009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 46109a52d85SRichard Henderson 46209a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 46309a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 46409a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(d, a, sh); 46509a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 46609a52d85SRichard Henderson } 46709a52d85SRichard Henderson 46809a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 46909a52d85SRichard Henderson { 47009a52d85SRichard Henderson TCGv_i32 t; 47109a52d85SRichard Henderson 47209a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 47309a52d85SRichard Henderson if (sh == 32) { 47409a52d85SRichard Henderson tcg_gen_extract_i32(d, a, sh - 1, 1); 47509a52d85SRichard Henderson return; 47609a52d85SRichard Henderson } 47709a52d85SRichard Henderson t = tcg_temp_new_i32(); 47809a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 47909a52d85SRichard Henderson tcg_gen_shri_i32(d, a, sh); 48009a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 48109a52d85SRichard Henderson } 48209a52d85SRichard Henderson 48309a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 48409a52d85SRichard Henderson { 48509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 48609a52d85SRichard Henderson 48709a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 48809a52d85SRichard Henderson tcg_gen_shri_i64(d, a, sh); 48909a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 49009a52d85SRichard Henderson } 49109a52d85SRichard Henderson 49209a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 49309a52d85SRichard Henderson { 49409a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 49509a52d85SRichard Henderson TCGv_vec ones = tcg_temp_new_vec_matching(d); 49609a52d85SRichard Henderson 49709a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, shift - 1); 49809a52d85SRichard Henderson tcg_gen_dupi_vec(vece, ones, 1); 49909a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 50009a52d85SRichard Henderson tcg_gen_shri_vec(vece, d, a, shift); 50109a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 50209a52d85SRichard Henderson } 50309a52d85SRichard Henderson 50409a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 50509a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 50609a52d85SRichard Henderson { 50709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 50809a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 50909a52d85SRichard Henderson }; 51009a52d85SRichard Henderson static const GVecGen2i ops[4] = { 51109a52d85SRichard Henderson { .fni8 = gen_urshr8_i64, 51209a52d85SRichard Henderson .fniv = gen_urshr_vec, 51309a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_b, 51409a52d85SRichard Henderson .opt_opc = vecop_list, 51509a52d85SRichard Henderson .vece = MO_8 }, 51609a52d85SRichard Henderson { .fni8 = gen_urshr16_i64, 51709a52d85SRichard Henderson .fniv = gen_urshr_vec, 51809a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_h, 51909a52d85SRichard Henderson .opt_opc = vecop_list, 52009a52d85SRichard Henderson .vece = MO_16 }, 52109a52d85SRichard Henderson { .fni4 = gen_urshr32_i32, 52209a52d85SRichard Henderson .fniv = gen_urshr_vec, 52309a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_s, 52409a52d85SRichard Henderson .opt_opc = vecop_list, 52509a52d85SRichard Henderson .vece = MO_32 }, 52609a52d85SRichard Henderson { .fni8 = gen_urshr64_i64, 52709a52d85SRichard Henderson .fniv = gen_urshr_vec, 52809a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_d, 52909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 53009a52d85SRichard Henderson .opt_opc = vecop_list, 53109a52d85SRichard Henderson .vece = MO_64 }, 53209a52d85SRichard Henderson }; 53309a52d85SRichard Henderson 53409a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 53509a52d85SRichard Henderson tcg_debug_assert(shift > 0); 53609a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 53709a52d85SRichard Henderson 53809a52d85SRichard Henderson if (shift == (8 << vece)) { 53909a52d85SRichard Henderson /* 54009a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 54109a52d85SRichard Henderson * Unsigned results in zero. With rounding, this produces a 54209a52d85SRichard Henderson * copy of the most significant bit. 54309a52d85SRichard Henderson */ 54409a52d85SRichard Henderson tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 54509a52d85SRichard Henderson } else { 54609a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 54709a52d85SRichard Henderson } 54809a52d85SRichard Henderson } 54909a52d85SRichard Henderson 55009a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 55109a52d85SRichard Henderson { 55209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 55309a52d85SRichard Henderson 55409a52d85SRichard Henderson if (sh == 8) { 55509a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(t, a, 7); 55609a52d85SRichard Henderson } else { 55709a52d85SRichard Henderson gen_urshr8_i64(t, a, sh); 55809a52d85SRichard Henderson } 55909a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 56009a52d85SRichard Henderson } 56109a52d85SRichard Henderson 56209a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 56309a52d85SRichard Henderson { 56409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 56509a52d85SRichard Henderson 56609a52d85SRichard Henderson if (sh == 16) { 56709a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(t, a, 15); 56809a52d85SRichard Henderson } else { 56909a52d85SRichard Henderson gen_urshr16_i64(t, a, sh); 57009a52d85SRichard Henderson } 57109a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 57209a52d85SRichard Henderson } 57309a52d85SRichard Henderson 57409a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 57509a52d85SRichard Henderson { 57609a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 57709a52d85SRichard Henderson 57809a52d85SRichard Henderson if (sh == 32) { 57909a52d85SRichard Henderson tcg_gen_shri_i32(t, a, 31); 58009a52d85SRichard Henderson } else { 58109a52d85SRichard Henderson gen_urshr32_i32(t, a, sh); 58209a52d85SRichard Henderson } 58309a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 58409a52d85SRichard Henderson } 58509a52d85SRichard Henderson 58609a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 58709a52d85SRichard Henderson { 58809a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 58909a52d85SRichard Henderson 59009a52d85SRichard Henderson if (sh == 64) { 59109a52d85SRichard Henderson tcg_gen_shri_i64(t, a, 63); 59209a52d85SRichard Henderson } else { 59309a52d85SRichard Henderson gen_urshr64_i64(t, a, sh); 59409a52d85SRichard Henderson } 59509a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 59609a52d85SRichard Henderson } 59709a52d85SRichard Henderson 59809a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 59909a52d85SRichard Henderson { 60009a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 60109a52d85SRichard Henderson 60209a52d85SRichard Henderson if (sh == (8 << vece)) { 60309a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 60409a52d85SRichard Henderson } else { 60509a52d85SRichard Henderson gen_urshr_vec(vece, t, a, sh); 60609a52d85SRichard Henderson } 60709a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 60809a52d85SRichard Henderson } 60909a52d85SRichard Henderson 61009a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 61109a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 61209a52d85SRichard Henderson { 61309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 61409a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 61509a52d85SRichard Henderson }; 61609a52d85SRichard Henderson static const GVecGen2i ops[4] = { 61709a52d85SRichard Henderson { .fni8 = gen_ursra8_i64, 61809a52d85SRichard Henderson .fniv = gen_ursra_vec, 61909a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_b, 62009a52d85SRichard Henderson .opt_opc = vecop_list, 62109a52d85SRichard Henderson .load_dest = true, 62209a52d85SRichard Henderson .vece = MO_8 }, 62309a52d85SRichard Henderson { .fni8 = gen_ursra16_i64, 62409a52d85SRichard Henderson .fniv = gen_ursra_vec, 62509a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_h, 62609a52d85SRichard Henderson .opt_opc = vecop_list, 62709a52d85SRichard Henderson .load_dest = true, 62809a52d85SRichard Henderson .vece = MO_16 }, 62909a52d85SRichard Henderson { .fni4 = gen_ursra32_i32, 63009a52d85SRichard Henderson .fniv = gen_ursra_vec, 63109a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_s, 63209a52d85SRichard Henderson .opt_opc = vecop_list, 63309a52d85SRichard Henderson .load_dest = true, 63409a52d85SRichard Henderson .vece = MO_32 }, 63509a52d85SRichard Henderson { .fni8 = gen_ursra64_i64, 63609a52d85SRichard Henderson .fniv = gen_ursra_vec, 63709a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_d, 63809a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 63909a52d85SRichard Henderson .opt_opc = vecop_list, 64009a52d85SRichard Henderson .load_dest = true, 64109a52d85SRichard Henderson .vece = MO_64 }, 64209a52d85SRichard Henderson }; 64309a52d85SRichard Henderson 64409a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 64509a52d85SRichard Henderson tcg_debug_assert(shift > 0); 64609a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 64709a52d85SRichard Henderson 64809a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 64909a52d85SRichard Henderson } 65009a52d85SRichard Henderson 65109a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 65209a52d85SRichard Henderson { 65309a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff >> shift); 65409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 65509a52d85SRichard Henderson 65609a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 65709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 65809a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 65909a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 66009a52d85SRichard Henderson } 66109a52d85SRichard Henderson 66209a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 66309a52d85SRichard Henderson { 66409a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff >> shift); 66509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 66609a52d85SRichard Henderson 66709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 66809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 66909a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 67009a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 67109a52d85SRichard Henderson } 67209a52d85SRichard Henderson 67309a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 67409a52d85SRichard Henderson { 67509a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 67609a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 67709a52d85SRichard Henderson } 67809a52d85SRichard Henderson 67909a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 68009a52d85SRichard Henderson { 68109a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 68209a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 68309a52d85SRichard Henderson } 68409a52d85SRichard Henderson 68509a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 68609a52d85SRichard Henderson { 68709a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 68809a52d85SRichard Henderson TCGv_vec m = tcg_temp_new_vec_matching(d); 68909a52d85SRichard Henderson 69009a52d85SRichard Henderson tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); 69109a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh); 69209a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 69309a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 69409a52d85SRichard Henderson } 69509a52d85SRichard Henderson 69609a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 69709a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 69809a52d85SRichard Henderson { 69909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 70009a52d85SRichard Henderson const GVecGen2i ops[4] = { 70109a52d85SRichard Henderson { .fni8 = gen_shr8_ins_i64, 70209a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 70309a52d85SRichard Henderson .fno = gen_helper_gvec_sri_b, 70409a52d85SRichard Henderson .load_dest = true, 70509a52d85SRichard Henderson .opt_opc = vecop_list, 70609a52d85SRichard Henderson .vece = MO_8 }, 70709a52d85SRichard Henderson { .fni8 = gen_shr16_ins_i64, 70809a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 70909a52d85SRichard Henderson .fno = gen_helper_gvec_sri_h, 71009a52d85SRichard Henderson .load_dest = true, 71109a52d85SRichard Henderson .opt_opc = vecop_list, 71209a52d85SRichard Henderson .vece = MO_16 }, 71309a52d85SRichard Henderson { .fni4 = gen_shr32_ins_i32, 71409a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 71509a52d85SRichard Henderson .fno = gen_helper_gvec_sri_s, 71609a52d85SRichard Henderson .load_dest = true, 71709a52d85SRichard Henderson .opt_opc = vecop_list, 71809a52d85SRichard Henderson .vece = MO_32 }, 71909a52d85SRichard Henderson { .fni8 = gen_shr64_ins_i64, 72009a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 72109a52d85SRichard Henderson .fno = gen_helper_gvec_sri_d, 72209a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 72309a52d85SRichard Henderson .load_dest = true, 72409a52d85SRichard Henderson .opt_opc = vecop_list, 72509a52d85SRichard Henderson .vece = MO_64 }, 72609a52d85SRichard Henderson }; 72709a52d85SRichard Henderson 72809a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 72909a52d85SRichard Henderson tcg_debug_assert(shift > 0); 73009a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 73109a52d85SRichard Henderson 73209a52d85SRichard Henderson /* Shift of esize leaves destination unchanged. */ 73309a52d85SRichard Henderson if (shift < (8 << vece)) { 73409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 73509a52d85SRichard Henderson } else { 73609a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 73709a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 73809a52d85SRichard Henderson } 73909a52d85SRichard Henderson } 74009a52d85SRichard Henderson 74109a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 74209a52d85SRichard Henderson { 74309a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff << shift); 74409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 74509a52d85SRichard Henderson 74609a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 74709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 74809a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 74909a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 75009a52d85SRichard Henderson } 75109a52d85SRichard Henderson 75209a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 75309a52d85SRichard Henderson { 75409a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff << shift); 75509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 75609a52d85SRichard Henderson 75709a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 75809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 75909a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 76009a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 76109a52d85SRichard Henderson } 76209a52d85SRichard Henderson 76309a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 76409a52d85SRichard Henderson { 76509a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 76609a52d85SRichard Henderson } 76709a52d85SRichard Henderson 76809a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 76909a52d85SRichard Henderson { 77009a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 77109a52d85SRichard Henderson } 77209a52d85SRichard Henderson 77309a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 77409a52d85SRichard Henderson { 77509a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 77609a52d85SRichard Henderson TCGv_vec m = tcg_temp_new_vec_matching(d); 77709a52d85SRichard Henderson 77809a52d85SRichard Henderson tcg_gen_shli_vec(vece, t, a, sh); 77909a52d85SRichard Henderson tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); 78009a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 78109a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 78209a52d85SRichard Henderson } 78309a52d85SRichard Henderson 78409a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 78509a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 78609a52d85SRichard Henderson { 78709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 78809a52d85SRichard Henderson const GVecGen2i ops[4] = { 78909a52d85SRichard Henderson { .fni8 = gen_shl8_ins_i64, 79009a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 79109a52d85SRichard Henderson .fno = gen_helper_gvec_sli_b, 79209a52d85SRichard Henderson .load_dest = true, 79309a52d85SRichard Henderson .opt_opc = vecop_list, 79409a52d85SRichard Henderson .vece = MO_8 }, 79509a52d85SRichard Henderson { .fni8 = gen_shl16_ins_i64, 79609a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 79709a52d85SRichard Henderson .fno = gen_helper_gvec_sli_h, 79809a52d85SRichard Henderson .load_dest = true, 79909a52d85SRichard Henderson .opt_opc = vecop_list, 80009a52d85SRichard Henderson .vece = MO_16 }, 80109a52d85SRichard Henderson { .fni4 = gen_shl32_ins_i32, 80209a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 80309a52d85SRichard Henderson .fno = gen_helper_gvec_sli_s, 80409a52d85SRichard Henderson .load_dest = true, 80509a52d85SRichard Henderson .opt_opc = vecop_list, 80609a52d85SRichard Henderson .vece = MO_32 }, 80709a52d85SRichard Henderson { .fni8 = gen_shl64_ins_i64, 80809a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 80909a52d85SRichard Henderson .fno = gen_helper_gvec_sli_d, 81009a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 81109a52d85SRichard Henderson .load_dest = true, 81209a52d85SRichard Henderson .opt_opc = vecop_list, 81309a52d85SRichard Henderson .vece = MO_64 }, 81409a52d85SRichard Henderson }; 81509a52d85SRichard Henderson 81609a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [0..esize-1]. */ 81709a52d85SRichard Henderson tcg_debug_assert(shift >= 0); 81809a52d85SRichard Henderson tcg_debug_assert(shift < (8 << vece)); 81909a52d85SRichard Henderson 82009a52d85SRichard Henderson if (shift == 0) { 82109a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 82209a52d85SRichard Henderson } else { 82309a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 82409a52d85SRichard Henderson } 82509a52d85SRichard Henderson } 82609a52d85SRichard Henderson 82709a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 82809a52d85SRichard Henderson { 82909a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 83009a52d85SRichard Henderson gen_helper_neon_add_u8(d, d, a); 83109a52d85SRichard Henderson } 83209a52d85SRichard Henderson 83309a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 83409a52d85SRichard Henderson { 83509a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 83609a52d85SRichard Henderson gen_helper_neon_sub_u8(d, d, a); 83709a52d85SRichard Henderson } 83809a52d85SRichard Henderson 83909a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 84009a52d85SRichard Henderson { 84109a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 84209a52d85SRichard Henderson gen_helper_neon_add_u16(d, d, a); 84309a52d85SRichard Henderson } 84409a52d85SRichard Henderson 84509a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 84609a52d85SRichard Henderson { 84709a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 84809a52d85SRichard Henderson gen_helper_neon_sub_u16(d, d, a); 84909a52d85SRichard Henderson } 85009a52d85SRichard Henderson 85109a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 85209a52d85SRichard Henderson { 85309a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 85409a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 85509a52d85SRichard Henderson } 85609a52d85SRichard Henderson 85709a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 85809a52d85SRichard Henderson { 85909a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 86009a52d85SRichard Henderson tcg_gen_sub_i32(d, d, a); 86109a52d85SRichard Henderson } 86209a52d85SRichard Henderson 86309a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 86409a52d85SRichard Henderson { 86509a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 86609a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 86709a52d85SRichard Henderson } 86809a52d85SRichard Henderson 86909a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 87009a52d85SRichard Henderson { 87109a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 87209a52d85SRichard Henderson tcg_gen_sub_i64(d, d, a); 87309a52d85SRichard Henderson } 87409a52d85SRichard Henderson 87509a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 87609a52d85SRichard Henderson { 87709a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 87809a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 87909a52d85SRichard Henderson } 88009a52d85SRichard Henderson 88109a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 88209a52d85SRichard Henderson { 88309a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 88409a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, a); 88509a52d85SRichard Henderson } 88609a52d85SRichard Henderson 88709a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 88809a52d85SRichard Henderson * these tables are shared with AArch64 which does support them. 88909a52d85SRichard Henderson */ 89009a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 89109a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 89209a52d85SRichard Henderson { 89309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 89409a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_add_vec, 0 89509a52d85SRichard Henderson }; 89609a52d85SRichard Henderson static const GVecGen3 ops[4] = { 89709a52d85SRichard Henderson { .fni4 = gen_mla8_i32, 89809a52d85SRichard Henderson .fniv = gen_mla_vec, 89909a52d85SRichard Henderson .load_dest = true, 90009a52d85SRichard Henderson .opt_opc = vecop_list, 90109a52d85SRichard Henderson .vece = MO_8 }, 90209a52d85SRichard Henderson { .fni4 = gen_mla16_i32, 90309a52d85SRichard Henderson .fniv = gen_mla_vec, 90409a52d85SRichard Henderson .load_dest = true, 90509a52d85SRichard Henderson .opt_opc = vecop_list, 90609a52d85SRichard Henderson .vece = MO_16 }, 90709a52d85SRichard Henderson { .fni4 = gen_mla32_i32, 90809a52d85SRichard Henderson .fniv = gen_mla_vec, 90909a52d85SRichard Henderson .load_dest = true, 91009a52d85SRichard Henderson .opt_opc = vecop_list, 91109a52d85SRichard Henderson .vece = MO_32 }, 91209a52d85SRichard Henderson { .fni8 = gen_mla64_i64, 91309a52d85SRichard Henderson .fniv = gen_mla_vec, 91409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 91509a52d85SRichard Henderson .load_dest = true, 91609a52d85SRichard Henderson .opt_opc = vecop_list, 91709a52d85SRichard Henderson .vece = MO_64 }, 91809a52d85SRichard Henderson }; 91909a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 92009a52d85SRichard Henderson } 92109a52d85SRichard Henderson 92209a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 92309a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 92409a52d85SRichard Henderson { 92509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 92609a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_sub_vec, 0 92709a52d85SRichard Henderson }; 92809a52d85SRichard Henderson static const GVecGen3 ops[4] = { 92909a52d85SRichard Henderson { .fni4 = gen_mls8_i32, 93009a52d85SRichard Henderson .fniv = gen_mls_vec, 93109a52d85SRichard Henderson .load_dest = true, 93209a52d85SRichard Henderson .opt_opc = vecop_list, 93309a52d85SRichard Henderson .vece = MO_8 }, 93409a52d85SRichard Henderson { .fni4 = gen_mls16_i32, 93509a52d85SRichard Henderson .fniv = gen_mls_vec, 93609a52d85SRichard Henderson .load_dest = true, 93709a52d85SRichard Henderson .opt_opc = vecop_list, 93809a52d85SRichard Henderson .vece = MO_16 }, 93909a52d85SRichard Henderson { .fni4 = gen_mls32_i32, 94009a52d85SRichard Henderson .fniv = gen_mls_vec, 94109a52d85SRichard Henderson .load_dest = true, 94209a52d85SRichard Henderson .opt_opc = vecop_list, 94309a52d85SRichard Henderson .vece = MO_32 }, 94409a52d85SRichard Henderson { .fni8 = gen_mls64_i64, 94509a52d85SRichard Henderson .fniv = gen_mls_vec, 94609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 94709a52d85SRichard Henderson .load_dest = true, 94809a52d85SRichard Henderson .opt_opc = vecop_list, 94909a52d85SRichard Henderson .vece = MO_64 }, 95009a52d85SRichard Henderson }; 95109a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 95209a52d85SRichard Henderson } 95309a52d85SRichard Henderson 95409a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */ 95509a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 95609a52d85SRichard Henderson { 957013506e0SRichard Henderson tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b); 95809a52d85SRichard Henderson } 95909a52d85SRichard Henderson 96009a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 96109a52d85SRichard Henderson { 962013506e0SRichard Henderson tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b); 96309a52d85SRichard Henderson } 96409a52d85SRichard Henderson 96509a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 96609a52d85SRichard Henderson { 9672310eb0aSRichard Henderson tcg_gen_cmp_vec(TCG_COND_TSTNE, vece, d, a, b); 96809a52d85SRichard Henderson } 96909a52d85SRichard Henderson 97009a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 97109a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 97209a52d85SRichard Henderson { 97309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 97409a52d85SRichard Henderson static const GVecGen3 ops[4] = { 97509a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u8, 97609a52d85SRichard Henderson .fniv = gen_cmtst_vec, 97709a52d85SRichard Henderson .opt_opc = vecop_list, 97809a52d85SRichard Henderson .vece = MO_8 }, 97909a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u16, 98009a52d85SRichard Henderson .fniv = gen_cmtst_vec, 98109a52d85SRichard Henderson .opt_opc = vecop_list, 98209a52d85SRichard Henderson .vece = MO_16 }, 98309a52d85SRichard Henderson { .fni4 = gen_cmtst_i32, 98409a52d85SRichard Henderson .fniv = gen_cmtst_vec, 98509a52d85SRichard Henderson .opt_opc = vecop_list, 98609a52d85SRichard Henderson .vece = MO_32 }, 98709a52d85SRichard Henderson { .fni8 = gen_cmtst_i64, 98809a52d85SRichard Henderson .fniv = gen_cmtst_vec, 98909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 99009a52d85SRichard Henderson .opt_opc = vecop_list, 99109a52d85SRichard Henderson .vece = MO_64 }, 99209a52d85SRichard Henderson }; 99309a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 99409a52d85SRichard Henderson } 99509a52d85SRichard Henderson 99609a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 99709a52d85SRichard Henderson { 99809a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 99909a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 100009a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 100109a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 100209a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 100309a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(32); 100409a52d85SRichard Henderson 100509a52d85SRichard Henderson /* 100609a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 100709a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 100809a52d85SRichard Henderson * Discard out-of-range results after the fact. 100909a52d85SRichard Henderson */ 101009a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 101109a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 101209a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 101309a52d85SRichard Henderson tcg_gen_shr_i32(rval, src, rsh); 101409a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 101509a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 101609a52d85SRichard Henderson } 101709a52d85SRichard Henderson 101809a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 101909a52d85SRichard Henderson { 102009a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 102109a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 102209a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 102309a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 102409a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 102509a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(64); 102609a52d85SRichard Henderson 102709a52d85SRichard Henderson /* 102809a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 102909a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 103009a52d85SRichard Henderson * Discard out-of-range results after the fact. 103109a52d85SRichard Henderson */ 103209a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 103309a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 103409a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 103509a52d85SRichard Henderson tcg_gen_shr_i64(rval, src, rsh); 103609a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 103709a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 103809a52d85SRichard Henderson } 103909a52d85SRichard Henderson 104009a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 104109a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 104209a52d85SRichard Henderson { 104309a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 104409a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 104509a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 104609a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 104709a52d85SRichard Henderson TCGv_vec msk, max; 104809a52d85SRichard Henderson 104909a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 105009a52d85SRichard Henderson if (vece == MO_8) { 105109a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 105209a52d85SRichard Henderson } else { 105309a52d85SRichard Henderson msk = tcg_temp_new_vec_matching(dst); 105409a52d85SRichard Henderson tcg_gen_dupi_vec(vece, msk, 0xff); 105509a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, msk); 105609a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, msk); 105709a52d85SRichard Henderson } 105809a52d85SRichard Henderson 105909a52d85SRichard Henderson /* 106009a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 106109a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 106209a52d85SRichard Henderson * Discard out-of-range results after the fact. 106309a52d85SRichard Henderson */ 106409a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 106509a52d85SRichard Henderson tcg_gen_shrv_vec(vece, rval, src, rsh); 106609a52d85SRichard Henderson 106709a52d85SRichard Henderson max = tcg_temp_new_vec_matching(dst); 106809a52d85SRichard Henderson tcg_gen_dupi_vec(vece, max, 8 << vece); 106909a52d85SRichard Henderson 107009a52d85SRichard Henderson /* 107109a52d85SRichard Henderson * The choice of LT (signed) and GEU (unsigned) are biased toward 107209a52d85SRichard Henderson * the instructions of the x86_64 host. For MO_8, the whole byte 107309a52d85SRichard Henderson * is significant so we must use an unsigned compare; otherwise we 107409a52d85SRichard Henderson * have already masked to a byte and so a signed compare works. 107509a52d85SRichard Henderson * Other tcg hosts have a full set of comparisons and do not care. 107609a52d85SRichard Henderson */ 107709a52d85SRichard Henderson if (vece == MO_8) { 107809a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); 107909a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); 108009a52d85SRichard Henderson tcg_gen_andc_vec(vece, lval, lval, lsh); 108109a52d85SRichard Henderson tcg_gen_andc_vec(vece, rval, rval, rsh); 108209a52d85SRichard Henderson } else { 108309a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); 108409a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); 108509a52d85SRichard Henderson tcg_gen_and_vec(vece, lval, lval, lsh); 108609a52d85SRichard Henderson tcg_gen_and_vec(vece, rval, rval, rsh); 108709a52d85SRichard Henderson } 108809a52d85SRichard Henderson tcg_gen_or_vec(vece, dst, lval, rval); 108909a52d85SRichard Henderson } 109009a52d85SRichard Henderson 109109a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 109209a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 109309a52d85SRichard Henderson { 109409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 109509a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_shlv_vec, 109609a52d85SRichard Henderson INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 109709a52d85SRichard Henderson }; 109809a52d85SRichard Henderson static const GVecGen3 ops[4] = { 109909a52d85SRichard Henderson { .fniv = gen_ushl_vec, 110009a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_b, 110109a52d85SRichard Henderson .opt_opc = vecop_list, 110209a52d85SRichard Henderson .vece = MO_8 }, 110309a52d85SRichard Henderson { .fniv = gen_ushl_vec, 110409a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_h, 110509a52d85SRichard Henderson .opt_opc = vecop_list, 110609a52d85SRichard Henderson .vece = MO_16 }, 110709a52d85SRichard Henderson { .fni4 = gen_ushl_i32, 110809a52d85SRichard Henderson .fniv = gen_ushl_vec, 110909a52d85SRichard Henderson .opt_opc = vecop_list, 111009a52d85SRichard Henderson .vece = MO_32 }, 111109a52d85SRichard Henderson { .fni8 = gen_ushl_i64, 111209a52d85SRichard Henderson .fniv = gen_ushl_vec, 111309a52d85SRichard Henderson .opt_opc = vecop_list, 111409a52d85SRichard Henderson .vece = MO_64 }, 111509a52d85SRichard Henderson }; 111609a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 111709a52d85SRichard Henderson } 111809a52d85SRichard Henderson 111909a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 112009a52d85SRichard Henderson { 112109a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 112209a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 112309a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 112409a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 112509a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 112609a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(31); 112709a52d85SRichard Henderson 112809a52d85SRichard Henderson /* 112909a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 113009a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 113109a52d85SRichard Henderson * Discard out-of-range results after the fact. 113209a52d85SRichard Henderson */ 113309a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 113409a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 113509a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 113609a52d85SRichard Henderson tcg_gen_umin_i32(rsh, rsh, max); 113709a52d85SRichard Henderson tcg_gen_sar_i32(rval, src, rsh); 113809a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 113909a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 114009a52d85SRichard Henderson } 114109a52d85SRichard Henderson 114209a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 114309a52d85SRichard Henderson { 114409a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 114509a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 114609a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 114709a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 114809a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 114909a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(63); 115009a52d85SRichard Henderson 115109a52d85SRichard Henderson /* 115209a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 115309a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 115409a52d85SRichard Henderson * Discard out-of-range results after the fact. 115509a52d85SRichard Henderson */ 115609a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 115709a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 115809a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 115909a52d85SRichard Henderson tcg_gen_umin_i64(rsh, rsh, max); 116009a52d85SRichard Henderson tcg_gen_sar_i64(rval, src, rsh); 116109a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 116209a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 116309a52d85SRichard Henderson } 116409a52d85SRichard Henderson 116509a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 116609a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 116709a52d85SRichard Henderson { 116809a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 116909a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 117009a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 117109a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 117209a52d85SRichard Henderson TCGv_vec tmp = tcg_temp_new_vec_matching(dst); 117309a52d85SRichard Henderson 117409a52d85SRichard Henderson /* 117509a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 117609a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 117709a52d85SRichard Henderson * Discard out-of-range results after the fact. 117809a52d85SRichard Henderson */ 117909a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 118009a52d85SRichard Henderson if (vece == MO_8) { 118109a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 118209a52d85SRichard Henderson } else { 118309a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0xff); 118409a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, tmp); 118509a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, tmp); 118609a52d85SRichard Henderson } 118709a52d85SRichard Henderson 118809a52d85SRichard Henderson /* Bound rsh so out of bound right shift gets -1. */ 118909a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); 119009a52d85SRichard Henderson tcg_gen_umin_vec(vece, rsh, rsh, tmp); 119109a52d85SRichard Henderson tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); 119209a52d85SRichard Henderson 119309a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 119409a52d85SRichard Henderson tcg_gen_sarv_vec(vece, rval, src, rsh); 119509a52d85SRichard Henderson 119609a52d85SRichard Henderson /* Select in-bound left shift. */ 119709a52d85SRichard Henderson tcg_gen_andc_vec(vece, lval, lval, tmp); 119809a52d85SRichard Henderson 119909a52d85SRichard Henderson /* Select between left and right shift. */ 120009a52d85SRichard Henderson if (vece == MO_8) { 120109a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0); 120209a52d85SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); 120309a52d85SRichard Henderson } else { 120409a52d85SRichard Henderson tcg_gen_dupi_vec(vece, tmp, 0x80); 120509a52d85SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); 120609a52d85SRichard Henderson } 120709a52d85SRichard Henderson } 120809a52d85SRichard Henderson 120909a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 121009a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 121109a52d85SRichard Henderson { 121209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 121309a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 121409a52d85SRichard Henderson INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 121509a52d85SRichard Henderson }; 121609a52d85SRichard Henderson static const GVecGen3 ops[4] = { 121709a52d85SRichard Henderson { .fniv = gen_sshl_vec, 121809a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_b, 121909a52d85SRichard Henderson .opt_opc = vecop_list, 122009a52d85SRichard Henderson .vece = MO_8 }, 122109a52d85SRichard Henderson { .fniv = gen_sshl_vec, 122209a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_h, 122309a52d85SRichard Henderson .opt_opc = vecop_list, 122409a52d85SRichard Henderson .vece = MO_16 }, 122509a52d85SRichard Henderson { .fni4 = gen_sshl_i32, 122609a52d85SRichard Henderson .fniv = gen_sshl_vec, 122709a52d85SRichard Henderson .opt_opc = vecop_list, 122809a52d85SRichard Henderson .vece = MO_32 }, 122909a52d85SRichard Henderson { .fni8 = gen_sshl_i64, 123009a52d85SRichard Henderson .fniv = gen_sshl_vec, 123109a52d85SRichard Henderson .opt_opc = vecop_list, 123209a52d85SRichard Henderson .vece = MO_64 }, 123309a52d85SRichard Henderson }; 123409a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 123509a52d85SRichard Henderson } 123609a52d85SRichard Henderson 1237940392c8SRichard Henderson void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1238940392c8SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1239940392c8SRichard Henderson { 1240940392c8SRichard Henderson static gen_helper_gvec_3 * const fns[] = { 1241940392c8SRichard Henderson gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h, 1242940392c8SRichard Henderson gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d, 1243940392c8SRichard Henderson }; 1244940392c8SRichard Henderson tcg_debug_assert(vece <= MO_64); 1245940392c8SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1246940392c8SRichard Henderson } 1247940392c8SRichard Henderson 1248940392c8SRichard Henderson void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1249940392c8SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1250940392c8SRichard Henderson { 1251940392c8SRichard Henderson static gen_helper_gvec_3 * const fns[] = { 1252940392c8SRichard Henderson gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h, 1253940392c8SRichard Henderson gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d, 1254940392c8SRichard Henderson }; 1255940392c8SRichard Henderson tcg_debug_assert(vece <= MO_64); 1256940392c8SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1257940392c8SRichard Henderson } 1258940392c8SRichard Henderson 1259e72a6878SRichard Henderson void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1260e72a6878SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1261e72a6878SRichard Henderson { 1262e72a6878SRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1263e72a6878SRichard Henderson gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h, 1264e72a6878SRichard Henderson gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d, 1265e72a6878SRichard Henderson }; 1266e72a6878SRichard Henderson tcg_debug_assert(vece <= MO_64); 1267e72a6878SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1268e72a6878SRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1269e72a6878SRichard Henderson } 1270e72a6878SRichard Henderson 1271e72a6878SRichard Henderson void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1272e72a6878SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1273e72a6878SRichard Henderson { 1274e72a6878SRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1275e72a6878SRichard Henderson gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h, 1276e72a6878SRichard Henderson gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d, 1277e72a6878SRichard Henderson }; 1278e72a6878SRichard Henderson tcg_debug_assert(vece <= MO_64); 1279e72a6878SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1280e72a6878SRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1281e72a6878SRichard Henderson } 1282e72a6878SRichard Henderson 1283cef9d54fSRichard Henderson void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1284cef9d54fSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1285cef9d54fSRichard Henderson { 1286cef9d54fSRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1287cef9d54fSRichard Henderson gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h, 1288cef9d54fSRichard Henderson gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d, 1289cef9d54fSRichard Henderson }; 1290cef9d54fSRichard Henderson tcg_debug_assert(vece <= MO_64); 1291cef9d54fSRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1292cef9d54fSRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1293cef9d54fSRichard Henderson } 1294cef9d54fSRichard Henderson 1295cef9d54fSRichard Henderson void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1296cef9d54fSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1297cef9d54fSRichard Henderson { 1298cef9d54fSRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1299cef9d54fSRichard Henderson gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h, 1300cef9d54fSRichard Henderson gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d, 1301cef9d54fSRichard Henderson }; 1302cef9d54fSRichard Henderson tcg_debug_assert(vece <= MO_64); 1303cef9d54fSRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1304cef9d54fSRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1305cef9d54fSRichard Henderson } 1306cef9d54fSRichard Henderson 1307f4fa83d6SRichard Henderson void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1308f4fa83d6SRichard Henderson { 1309f4fa83d6SRichard Henderson uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); 1310f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1311f4fa83d6SRichard Henderson 1312f4fa83d6SRichard Henderson tcg_gen_add_i64(tmp, a, b); 1313f4fa83d6SRichard Henderson tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); 1314f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1315f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1316f4fa83d6SRichard Henderson } 1317f4fa83d6SRichard Henderson 1318f4fa83d6SRichard Henderson void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1319f4fa83d6SRichard Henderson { 1320f4fa83d6SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1321f4fa83d6SRichard Henderson 1322f4fa83d6SRichard Henderson tcg_gen_add_i64(t, a, b); 1323f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, 1324f4fa83d6SRichard Henderson tcg_constant_i64(UINT64_MAX), t); 1325f4fa83d6SRichard Henderson tcg_gen_xor_i64(t, t, res); 1326f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t); 1327f4fa83d6SRichard Henderson } 1328f4fa83d6SRichard Henderson 132976f4a8aeSRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 133009a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 133109a52d85SRichard Henderson { 133209a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 133309a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 133409a52d85SRichard Henderson tcg_gen_usadd_vec(vece, t, a, b); 133576f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 133676f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 133709a52d85SRichard Henderson } 133809a52d85SRichard Henderson 133909a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 134009a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 134109a52d85SRichard Henderson { 134209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 134376f4a8aeSRichard Henderson INDEX_op_usadd_vec, INDEX_op_add_vec, 0 134409a52d85SRichard Henderson }; 134509a52d85SRichard Henderson static const GVecGen4 ops[4] = { 134609a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 134709a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_b, 134809a52d85SRichard Henderson .write_aofs = true, 134909a52d85SRichard Henderson .opt_opc = vecop_list, 135009a52d85SRichard Henderson .vece = MO_8 }, 135109a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 135209a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_h, 135309a52d85SRichard Henderson .write_aofs = true, 135409a52d85SRichard Henderson .opt_opc = vecop_list, 135509a52d85SRichard Henderson .vece = MO_16 }, 135609a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 135709a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_s, 135809a52d85SRichard Henderson .write_aofs = true, 135909a52d85SRichard Henderson .opt_opc = vecop_list, 136009a52d85SRichard Henderson .vece = MO_32 }, 136109a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 1362f4fa83d6SRichard Henderson .fni8 = gen_uqadd_d, 136309a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_d, 136409a52d85SRichard Henderson .write_aofs = true, 136509a52d85SRichard Henderson .opt_opc = vecop_list, 136609a52d85SRichard Henderson .vece = MO_64 }, 136709a52d85SRichard Henderson }; 136801d5665bSRichard Henderson 136901d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 137009a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 137109a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 137209a52d85SRichard Henderson } 137309a52d85SRichard Henderson 1374f4fa83d6SRichard Henderson void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1375f4fa83d6SRichard Henderson { 1376f4fa83d6SRichard Henderson int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1377f4fa83d6SRichard Henderson int64_t min = -1ll - max; 1378f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1379f4fa83d6SRichard Henderson 1380f4fa83d6SRichard Henderson tcg_gen_add_i64(tmp, a, b); 1381f4fa83d6SRichard Henderson tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1382f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1383f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1384f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1385f4fa83d6SRichard Henderson } 1386f4fa83d6SRichard Henderson 1387f4fa83d6SRichard Henderson void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1388f4fa83d6SRichard Henderson { 1389f4fa83d6SRichard Henderson TCGv_i64 t0 = tcg_temp_new_i64(); 1390f4fa83d6SRichard Henderson TCGv_i64 t1 = tcg_temp_new_i64(); 1391f4fa83d6SRichard Henderson TCGv_i64 t2 = tcg_temp_new_i64(); 1392f4fa83d6SRichard Henderson 1393f4fa83d6SRichard Henderson tcg_gen_add_i64(t0, a, b); 1394f4fa83d6SRichard Henderson 1395f4fa83d6SRichard Henderson /* Compute signed overflow indication into T1 */ 1396f4fa83d6SRichard Henderson tcg_gen_xor_i64(t1, a, b); 1397f4fa83d6SRichard Henderson tcg_gen_xor_i64(t2, t0, a); 1398f4fa83d6SRichard Henderson tcg_gen_andc_i64(t1, t2, t1); 1399f4fa83d6SRichard Henderson 1400f4fa83d6SRichard Henderson /* Compute saturated value into T2 */ 1401f4fa83d6SRichard Henderson tcg_gen_sari_i64(t2, a, 63); 1402f4fa83d6SRichard Henderson tcg_gen_xori_i64(t2, t2, INT64_MAX); 1403f4fa83d6SRichard Henderson 1404f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1405f4fa83d6SRichard Henderson tcg_gen_xor_i64(t0, t0, res); 1406f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t0); 1407f4fa83d6SRichard Henderson } 1408f4fa83d6SRichard Henderson 140976f4a8aeSRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 141009a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 141109a52d85SRichard Henderson { 141209a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 141309a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 141409a52d85SRichard Henderson tcg_gen_ssadd_vec(vece, t, a, b); 141576f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 141676f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 141709a52d85SRichard Henderson } 141809a52d85SRichard Henderson 141909a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 142009a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 142109a52d85SRichard Henderson { 142209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 142376f4a8aeSRichard Henderson INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 142409a52d85SRichard Henderson }; 142509a52d85SRichard Henderson static const GVecGen4 ops[4] = { 142609a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 142709a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_b, 142809a52d85SRichard Henderson .opt_opc = vecop_list, 142909a52d85SRichard Henderson .write_aofs = true, 143009a52d85SRichard Henderson .vece = MO_8 }, 143109a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 143209a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_h, 143309a52d85SRichard Henderson .opt_opc = vecop_list, 143409a52d85SRichard Henderson .write_aofs = true, 143509a52d85SRichard Henderson .vece = MO_16 }, 143609a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 143709a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_s, 143809a52d85SRichard Henderson .opt_opc = vecop_list, 143909a52d85SRichard Henderson .write_aofs = true, 144009a52d85SRichard Henderson .vece = MO_32 }, 144109a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 1442f4fa83d6SRichard Henderson .fni8 = gen_sqadd_d, 144309a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_d, 144409a52d85SRichard Henderson .opt_opc = vecop_list, 144509a52d85SRichard Henderson .write_aofs = true, 144609a52d85SRichard Henderson .vece = MO_64 }, 144709a52d85SRichard Henderson }; 144801d5665bSRichard Henderson 144901d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 145009a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 145109a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 145209a52d85SRichard Henderson } 145309a52d85SRichard Henderson 1454f4fa83d6SRichard Henderson void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1455f4fa83d6SRichard Henderson { 1456f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1457f4fa83d6SRichard Henderson 1458f4fa83d6SRichard Henderson tcg_gen_sub_i64(tmp, a, b); 1459f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); 1460f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1461f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1462f4fa83d6SRichard Henderson } 1463f4fa83d6SRichard Henderson 1464f4fa83d6SRichard Henderson void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1465f4fa83d6SRichard Henderson { 1466f4fa83d6SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1467f4fa83d6SRichard Henderson 1468f4fa83d6SRichard Henderson tcg_gen_sub_i64(t, a, b); 1469f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); 1470f4fa83d6SRichard Henderson tcg_gen_xor_i64(t, t, res); 1471f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t); 1472f4fa83d6SRichard Henderson } 1473f4fa83d6SRichard Henderson 147476f4a8aeSRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 147509a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 147609a52d85SRichard Henderson { 147709a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 147809a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 147909a52d85SRichard Henderson tcg_gen_ussub_vec(vece, t, a, b); 148076f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 148176f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 148209a52d85SRichard Henderson } 148309a52d85SRichard Henderson 148409a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 148509a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 148609a52d85SRichard Henderson { 148709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 148876f4a8aeSRichard Henderson INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 148909a52d85SRichard Henderson }; 149009a52d85SRichard Henderson static const GVecGen4 ops[4] = { 149109a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 149209a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_b, 149309a52d85SRichard Henderson .opt_opc = vecop_list, 149409a52d85SRichard Henderson .write_aofs = true, 149509a52d85SRichard Henderson .vece = MO_8 }, 149609a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 149709a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_h, 149809a52d85SRichard Henderson .opt_opc = vecop_list, 149909a52d85SRichard Henderson .write_aofs = true, 150009a52d85SRichard Henderson .vece = MO_16 }, 150109a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 150209a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_s, 150309a52d85SRichard Henderson .opt_opc = vecop_list, 150409a52d85SRichard Henderson .write_aofs = true, 150509a52d85SRichard Henderson .vece = MO_32 }, 150609a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 1507f4fa83d6SRichard Henderson .fni8 = gen_uqsub_d, 150809a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_d, 150909a52d85SRichard Henderson .opt_opc = vecop_list, 151009a52d85SRichard Henderson .write_aofs = true, 151109a52d85SRichard Henderson .vece = MO_64 }, 151209a52d85SRichard Henderson }; 151301d5665bSRichard Henderson 151401d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 151509a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 151609a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 151709a52d85SRichard Henderson } 151809a52d85SRichard Henderson 1519f4fa83d6SRichard Henderson void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1520f4fa83d6SRichard Henderson { 1521f4fa83d6SRichard Henderson int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1522f4fa83d6SRichard Henderson int64_t min = -1ll - max; 1523f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1524f4fa83d6SRichard Henderson 1525f4fa83d6SRichard Henderson tcg_gen_sub_i64(tmp, a, b); 1526f4fa83d6SRichard Henderson tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1527f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1528f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1529f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1530f4fa83d6SRichard Henderson } 1531f4fa83d6SRichard Henderson 1532f4fa83d6SRichard Henderson void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1533f4fa83d6SRichard Henderson { 1534f4fa83d6SRichard Henderson TCGv_i64 t0 = tcg_temp_new_i64(); 1535f4fa83d6SRichard Henderson TCGv_i64 t1 = tcg_temp_new_i64(); 1536f4fa83d6SRichard Henderson TCGv_i64 t2 = tcg_temp_new_i64(); 1537f4fa83d6SRichard Henderson 1538f4fa83d6SRichard Henderson tcg_gen_sub_i64(t0, a, b); 1539f4fa83d6SRichard Henderson 1540f4fa83d6SRichard Henderson /* Compute signed overflow indication into T1 */ 1541f4fa83d6SRichard Henderson tcg_gen_xor_i64(t1, a, b); 1542f4fa83d6SRichard Henderson tcg_gen_xor_i64(t2, t0, a); 1543f4fa83d6SRichard Henderson tcg_gen_and_i64(t1, t1, t2); 1544f4fa83d6SRichard Henderson 1545f4fa83d6SRichard Henderson /* Compute saturated value into T2 */ 1546f4fa83d6SRichard Henderson tcg_gen_sari_i64(t2, a, 63); 1547f4fa83d6SRichard Henderson tcg_gen_xori_i64(t2, t2, INT64_MAX); 1548f4fa83d6SRichard Henderson 1549f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1550f4fa83d6SRichard Henderson tcg_gen_xor_i64(t0, t0, res); 1551f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t0); 1552f4fa83d6SRichard Henderson } 1553f4fa83d6SRichard Henderson 155476f4a8aeSRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 155509a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 155609a52d85SRichard Henderson { 155709a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 155809a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 155909a52d85SRichard Henderson tcg_gen_sssub_vec(vece, t, a, b); 156076f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 156176f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 156209a52d85SRichard Henderson } 156309a52d85SRichard Henderson 156409a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 156509a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 156609a52d85SRichard Henderson { 156709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 156876f4a8aeSRichard Henderson INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 156909a52d85SRichard Henderson }; 157009a52d85SRichard Henderson static const GVecGen4 ops[4] = { 157109a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 157209a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_b, 157309a52d85SRichard Henderson .opt_opc = vecop_list, 157409a52d85SRichard Henderson .write_aofs = true, 157509a52d85SRichard Henderson .vece = MO_8 }, 157609a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 157709a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_h, 157809a52d85SRichard Henderson .opt_opc = vecop_list, 157909a52d85SRichard Henderson .write_aofs = true, 158009a52d85SRichard Henderson .vece = MO_16 }, 158109a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 158209a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_s, 158309a52d85SRichard Henderson .opt_opc = vecop_list, 158409a52d85SRichard Henderson .write_aofs = true, 158509a52d85SRichard Henderson .vece = MO_32 }, 158609a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 1587f4fa83d6SRichard Henderson .fni8 = gen_sqsub_d, 158809a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_d, 158909a52d85SRichard Henderson .opt_opc = vecop_list, 159009a52d85SRichard Henderson .write_aofs = true, 159109a52d85SRichard Henderson .vece = MO_64 }, 159209a52d85SRichard Henderson }; 159301d5665bSRichard Henderson 159401d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 159509a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 159609a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 159709a52d85SRichard Henderson } 159809a52d85SRichard Henderson 159909a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 160009a52d85SRichard Henderson { 160109a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 160209a52d85SRichard Henderson 160309a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 160409a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 160509a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 160609a52d85SRichard Henderson } 160709a52d85SRichard Henderson 160809a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 160909a52d85SRichard Henderson { 161009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 161109a52d85SRichard Henderson 161209a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 161309a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 161409a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 161509a52d85SRichard Henderson } 161609a52d85SRichard Henderson 161709a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 161809a52d85SRichard Henderson { 161909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 162009a52d85SRichard Henderson 162109a52d85SRichard Henderson tcg_gen_smin_vec(vece, t, a, b); 162209a52d85SRichard Henderson tcg_gen_smax_vec(vece, d, a, b); 162309a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 162409a52d85SRichard Henderson } 162509a52d85SRichard Henderson 162609a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 162709a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 162809a52d85SRichard Henderson { 162909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 163009a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 163109a52d85SRichard Henderson }; 163209a52d85SRichard Henderson static const GVecGen3 ops[4] = { 163309a52d85SRichard Henderson { .fniv = gen_sabd_vec, 163409a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_b, 163509a52d85SRichard Henderson .opt_opc = vecop_list, 163609a52d85SRichard Henderson .vece = MO_8 }, 163709a52d85SRichard Henderson { .fniv = gen_sabd_vec, 163809a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_h, 163909a52d85SRichard Henderson .opt_opc = vecop_list, 164009a52d85SRichard Henderson .vece = MO_16 }, 164109a52d85SRichard Henderson { .fni4 = gen_sabd_i32, 164209a52d85SRichard Henderson .fniv = gen_sabd_vec, 164309a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_s, 164409a52d85SRichard Henderson .opt_opc = vecop_list, 164509a52d85SRichard Henderson .vece = MO_32 }, 164609a52d85SRichard Henderson { .fni8 = gen_sabd_i64, 164709a52d85SRichard Henderson .fniv = gen_sabd_vec, 164809a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_d, 164909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 165009a52d85SRichard Henderson .opt_opc = vecop_list, 165109a52d85SRichard Henderson .vece = MO_64 }, 165209a52d85SRichard Henderson }; 165309a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 165409a52d85SRichard Henderson } 165509a52d85SRichard Henderson 165609a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 165709a52d85SRichard Henderson { 165809a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 165909a52d85SRichard Henderson 166009a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 166109a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 166209a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 166309a52d85SRichard Henderson } 166409a52d85SRichard Henderson 166509a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 166609a52d85SRichard Henderson { 166709a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 166809a52d85SRichard Henderson 166909a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 167009a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 167109a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 167209a52d85SRichard Henderson } 167309a52d85SRichard Henderson 167409a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 167509a52d85SRichard Henderson { 167609a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 167709a52d85SRichard Henderson 167809a52d85SRichard Henderson tcg_gen_umin_vec(vece, t, a, b); 167909a52d85SRichard Henderson tcg_gen_umax_vec(vece, d, a, b); 168009a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 168109a52d85SRichard Henderson } 168209a52d85SRichard Henderson 168309a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 168409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 168509a52d85SRichard Henderson { 168609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 168709a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 168809a52d85SRichard Henderson }; 168909a52d85SRichard Henderson static const GVecGen3 ops[4] = { 169009a52d85SRichard Henderson { .fniv = gen_uabd_vec, 169109a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_b, 169209a52d85SRichard Henderson .opt_opc = vecop_list, 169309a52d85SRichard Henderson .vece = MO_8 }, 169409a52d85SRichard Henderson { .fniv = gen_uabd_vec, 169509a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_h, 169609a52d85SRichard Henderson .opt_opc = vecop_list, 169709a52d85SRichard Henderson .vece = MO_16 }, 169809a52d85SRichard Henderson { .fni4 = gen_uabd_i32, 169909a52d85SRichard Henderson .fniv = gen_uabd_vec, 170009a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_s, 170109a52d85SRichard Henderson .opt_opc = vecop_list, 170209a52d85SRichard Henderson .vece = MO_32 }, 170309a52d85SRichard Henderson { .fni8 = gen_uabd_i64, 170409a52d85SRichard Henderson .fniv = gen_uabd_vec, 170509a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_d, 170609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 170709a52d85SRichard Henderson .opt_opc = vecop_list, 170809a52d85SRichard Henderson .vece = MO_64 }, 170909a52d85SRichard Henderson }; 171009a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 171109a52d85SRichard Henderson } 171209a52d85SRichard Henderson 171309a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 171409a52d85SRichard Henderson { 171509a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 171609a52d85SRichard Henderson gen_sabd_i32(t, a, b); 171709a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 171809a52d85SRichard Henderson } 171909a52d85SRichard Henderson 172009a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 172109a52d85SRichard Henderson { 172209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 172309a52d85SRichard Henderson gen_sabd_i64(t, a, b); 172409a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 172509a52d85SRichard Henderson } 172609a52d85SRichard Henderson 172709a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 172809a52d85SRichard Henderson { 172909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 173009a52d85SRichard Henderson gen_sabd_vec(vece, t, a, b); 173109a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 173209a52d85SRichard Henderson } 173309a52d85SRichard Henderson 173409a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 173509a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 173609a52d85SRichard Henderson { 173709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 173809a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 173909a52d85SRichard Henderson INDEX_op_smin_vec, INDEX_op_smax_vec, 0 174009a52d85SRichard Henderson }; 174109a52d85SRichard Henderson static const GVecGen3 ops[4] = { 174209a52d85SRichard Henderson { .fniv = gen_saba_vec, 174309a52d85SRichard Henderson .fno = gen_helper_gvec_saba_b, 174409a52d85SRichard Henderson .opt_opc = vecop_list, 174509a52d85SRichard Henderson .load_dest = true, 174609a52d85SRichard Henderson .vece = MO_8 }, 174709a52d85SRichard Henderson { .fniv = gen_saba_vec, 174809a52d85SRichard Henderson .fno = gen_helper_gvec_saba_h, 174909a52d85SRichard Henderson .opt_opc = vecop_list, 175009a52d85SRichard Henderson .load_dest = true, 175109a52d85SRichard Henderson .vece = MO_16 }, 175209a52d85SRichard Henderson { .fni4 = gen_saba_i32, 175309a52d85SRichard Henderson .fniv = gen_saba_vec, 175409a52d85SRichard Henderson .fno = gen_helper_gvec_saba_s, 175509a52d85SRichard Henderson .opt_opc = vecop_list, 175609a52d85SRichard Henderson .load_dest = true, 175709a52d85SRichard Henderson .vece = MO_32 }, 175809a52d85SRichard Henderson { .fni8 = gen_saba_i64, 175909a52d85SRichard Henderson .fniv = gen_saba_vec, 176009a52d85SRichard Henderson .fno = gen_helper_gvec_saba_d, 176109a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 176209a52d85SRichard Henderson .opt_opc = vecop_list, 176309a52d85SRichard Henderson .load_dest = true, 176409a52d85SRichard Henderson .vece = MO_64 }, 176509a52d85SRichard Henderson }; 176609a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 176709a52d85SRichard Henderson } 176809a52d85SRichard Henderson 176909a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 177009a52d85SRichard Henderson { 177109a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 177209a52d85SRichard Henderson gen_uabd_i32(t, a, b); 177309a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 177409a52d85SRichard Henderson } 177509a52d85SRichard Henderson 177609a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 177709a52d85SRichard Henderson { 177809a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 177909a52d85SRichard Henderson gen_uabd_i64(t, a, b); 178009a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 178109a52d85SRichard Henderson } 178209a52d85SRichard Henderson 178309a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 178409a52d85SRichard Henderson { 178509a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 178609a52d85SRichard Henderson gen_uabd_vec(vece, t, a, b); 178709a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 178809a52d85SRichard Henderson } 178909a52d85SRichard Henderson 179009a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 179109a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 179209a52d85SRichard Henderson { 179309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 179409a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 179509a52d85SRichard Henderson INDEX_op_umin_vec, INDEX_op_umax_vec, 0 179609a52d85SRichard Henderson }; 179709a52d85SRichard Henderson static const GVecGen3 ops[4] = { 179809a52d85SRichard Henderson { .fniv = gen_uaba_vec, 179909a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_b, 180009a52d85SRichard Henderson .opt_opc = vecop_list, 180109a52d85SRichard Henderson .load_dest = true, 180209a52d85SRichard Henderson .vece = MO_8 }, 180309a52d85SRichard Henderson { .fniv = gen_uaba_vec, 180409a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_h, 180509a52d85SRichard Henderson .opt_opc = vecop_list, 180609a52d85SRichard Henderson .load_dest = true, 180709a52d85SRichard Henderson .vece = MO_16 }, 180809a52d85SRichard Henderson { .fni4 = gen_uaba_i32, 180909a52d85SRichard Henderson .fniv = gen_uaba_vec, 181009a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_s, 181109a52d85SRichard Henderson .opt_opc = vecop_list, 181209a52d85SRichard Henderson .load_dest = true, 181309a52d85SRichard Henderson .vece = MO_32 }, 181409a52d85SRichard Henderson { .fni8 = gen_uaba_i64, 181509a52d85SRichard Henderson .fniv = gen_uaba_vec, 181609a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_d, 181709a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 181809a52d85SRichard Henderson .opt_opc = vecop_list, 181909a52d85SRichard Henderson .load_dest = true, 182009a52d85SRichard Henderson .vece = MO_64 }, 182109a52d85SRichard Henderson }; 182209a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 182309a52d85SRichard Henderson } 1824a7e4eec6SRichard Henderson 1825a7e4eec6SRichard Henderson void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1826a7e4eec6SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1827a7e4eec6SRichard Henderson { 1828a7e4eec6SRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 1829a7e4eec6SRichard Henderson gen_helper_gvec_addp_b, 1830a7e4eec6SRichard Henderson gen_helper_gvec_addp_h, 1831a7e4eec6SRichard Henderson gen_helper_gvec_addp_s, 1832a7e4eec6SRichard Henderson gen_helper_gvec_addp_d, 1833a7e4eec6SRichard Henderson }; 1834a7e4eec6SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1835a7e4eec6SRichard Henderson } 183628b5451bSRichard Henderson 183728b5451bSRichard Henderson void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 183828b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 183928b5451bSRichard Henderson { 184028b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 184128b5451bSRichard Henderson gen_helper_gvec_smaxp_b, 184228b5451bSRichard Henderson gen_helper_gvec_smaxp_h, 184328b5451bSRichard Henderson gen_helper_gvec_smaxp_s, 184428b5451bSRichard Henderson }; 184528b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 184628b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 184728b5451bSRichard Henderson } 184828b5451bSRichard Henderson 184928b5451bSRichard Henderson void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 185028b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 185128b5451bSRichard Henderson { 185228b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 185328b5451bSRichard Henderson gen_helper_gvec_sminp_b, 185428b5451bSRichard Henderson gen_helper_gvec_sminp_h, 185528b5451bSRichard Henderson gen_helper_gvec_sminp_s, 185628b5451bSRichard Henderson }; 185728b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 185828b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 185928b5451bSRichard Henderson } 186028b5451bSRichard Henderson 186128b5451bSRichard Henderson void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 186228b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 186328b5451bSRichard Henderson { 186428b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 186528b5451bSRichard Henderson gen_helper_gvec_umaxp_b, 186628b5451bSRichard Henderson gen_helper_gvec_umaxp_h, 186728b5451bSRichard Henderson gen_helper_gvec_umaxp_s, 186828b5451bSRichard Henderson }; 186928b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 187028b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 187128b5451bSRichard Henderson } 187228b5451bSRichard Henderson 187328b5451bSRichard Henderson void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 187428b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 187528b5451bSRichard Henderson { 187628b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 187728b5451bSRichard Henderson gen_helper_gvec_uminp_b, 187828b5451bSRichard Henderson gen_helper_gvec_uminp_h, 187928b5451bSRichard Henderson gen_helper_gvec_uminp_s, 188028b5451bSRichard Henderson }; 188128b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 188228b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 188328b5451bSRichard Henderson } 1884203aca91SRichard Henderson 1885203aca91SRichard Henderson static void gen_shadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1886203aca91SRichard Henderson { 1887203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1888203aca91SRichard Henderson 1889203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1890203aca91SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, 1); 1891203aca91SRichard Henderson tcg_gen_vec_sar8i_i64(b, b, 1); 1892203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 1893203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 1894203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 1895203aca91SRichard Henderson } 1896203aca91SRichard Henderson 1897203aca91SRichard Henderson static void gen_shadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1898203aca91SRichard Henderson { 1899203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1900203aca91SRichard Henderson 1901203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1902203aca91SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, 1); 1903203aca91SRichard Henderson tcg_gen_vec_sar16i_i64(b, b, 1); 1904203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 1905203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 1906203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 1907203aca91SRichard Henderson } 1908203aca91SRichard Henderson 1909203aca91SRichard Henderson static void gen_shadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1910203aca91SRichard Henderson { 1911203aca91SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 1912203aca91SRichard Henderson 1913203aca91SRichard Henderson tcg_gen_and_i32(t, a, b); 1914203aca91SRichard Henderson tcg_gen_sari_i32(a, a, 1); 1915203aca91SRichard Henderson tcg_gen_sari_i32(b, b, 1); 1916203aca91SRichard Henderson tcg_gen_andi_i32(t, t, 1); 1917203aca91SRichard Henderson tcg_gen_add_i32(d, a, b); 1918203aca91SRichard Henderson tcg_gen_add_i32(d, d, t); 1919203aca91SRichard Henderson } 1920203aca91SRichard Henderson 1921203aca91SRichard Henderson static void gen_shadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1922203aca91SRichard Henderson { 1923203aca91SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 1924203aca91SRichard Henderson 1925203aca91SRichard Henderson tcg_gen_and_vec(vece, t, a, b); 1926203aca91SRichard Henderson tcg_gen_sari_vec(vece, a, a, 1); 1927203aca91SRichard Henderson tcg_gen_sari_vec(vece, b, b, 1); 1928203aca91SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 1929203aca91SRichard Henderson tcg_gen_add_vec(vece, d, a, b); 1930203aca91SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 1931203aca91SRichard Henderson } 1932203aca91SRichard Henderson 1933203aca91SRichard Henderson void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1934203aca91SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1935203aca91SRichard Henderson { 1936203aca91SRichard Henderson static const TCGOpcode vecop_list[] = { 1937203aca91SRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 1938203aca91SRichard Henderson }; 1939203aca91SRichard Henderson static const GVecGen3 g[] = { 1940203aca91SRichard Henderson { .fni8 = gen_shadd8_i64, 1941203aca91SRichard Henderson .fniv = gen_shadd_vec, 1942203aca91SRichard Henderson .opt_opc = vecop_list, 1943203aca91SRichard Henderson .vece = MO_8 }, 1944203aca91SRichard Henderson { .fni8 = gen_shadd16_i64, 1945203aca91SRichard Henderson .fniv = gen_shadd_vec, 1946203aca91SRichard Henderson .opt_opc = vecop_list, 1947203aca91SRichard Henderson .vece = MO_16 }, 1948203aca91SRichard Henderson { .fni4 = gen_shadd_i32, 1949203aca91SRichard Henderson .fniv = gen_shadd_vec, 1950203aca91SRichard Henderson .opt_opc = vecop_list, 1951203aca91SRichard Henderson .vece = MO_32 }, 1952203aca91SRichard Henderson }; 1953203aca91SRichard Henderson tcg_debug_assert(vece <= MO_32); 1954203aca91SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 1955203aca91SRichard Henderson } 1956203aca91SRichard Henderson 1957203aca91SRichard Henderson static void gen_uhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1958203aca91SRichard Henderson { 1959203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1960203aca91SRichard Henderson 1961203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1962203aca91SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, 1); 1963203aca91SRichard Henderson tcg_gen_vec_shr8i_i64(b, b, 1); 1964203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 1965203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 1966203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 1967203aca91SRichard Henderson } 1968203aca91SRichard Henderson 1969203aca91SRichard Henderson static void gen_uhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1970203aca91SRichard Henderson { 1971203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1972203aca91SRichard Henderson 1973203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1974203aca91SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, 1); 1975203aca91SRichard Henderson tcg_gen_vec_shr16i_i64(b, b, 1); 1976203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 1977203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 1978203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 1979203aca91SRichard Henderson } 1980203aca91SRichard Henderson 1981203aca91SRichard Henderson static void gen_uhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1982203aca91SRichard Henderson { 1983203aca91SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 1984203aca91SRichard Henderson 1985203aca91SRichard Henderson tcg_gen_and_i32(t, a, b); 1986203aca91SRichard Henderson tcg_gen_shri_i32(a, a, 1); 1987203aca91SRichard Henderson tcg_gen_shri_i32(b, b, 1); 1988203aca91SRichard Henderson tcg_gen_andi_i32(t, t, 1); 1989203aca91SRichard Henderson tcg_gen_add_i32(d, a, b); 1990203aca91SRichard Henderson tcg_gen_add_i32(d, d, t); 1991203aca91SRichard Henderson } 1992203aca91SRichard Henderson 1993203aca91SRichard Henderson static void gen_uhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1994203aca91SRichard Henderson { 1995203aca91SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 1996203aca91SRichard Henderson 1997203aca91SRichard Henderson tcg_gen_and_vec(vece, t, a, b); 1998203aca91SRichard Henderson tcg_gen_shri_vec(vece, a, a, 1); 1999203aca91SRichard Henderson tcg_gen_shri_vec(vece, b, b, 1); 2000203aca91SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 2001203aca91SRichard Henderson tcg_gen_add_vec(vece, d, a, b); 2002203aca91SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 2003203aca91SRichard Henderson } 2004203aca91SRichard Henderson 2005203aca91SRichard Henderson void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 2006203aca91SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 2007203aca91SRichard Henderson { 2008203aca91SRichard Henderson static const TCGOpcode vecop_list[] = { 2009203aca91SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 2010203aca91SRichard Henderson }; 2011203aca91SRichard Henderson static const GVecGen3 g[] = { 2012203aca91SRichard Henderson { .fni8 = gen_uhadd8_i64, 2013203aca91SRichard Henderson .fniv = gen_uhadd_vec, 2014203aca91SRichard Henderson .opt_opc = vecop_list, 2015203aca91SRichard Henderson .vece = MO_8 }, 2016203aca91SRichard Henderson { .fni8 = gen_uhadd16_i64, 2017203aca91SRichard Henderson .fniv = gen_uhadd_vec, 2018203aca91SRichard Henderson .opt_opc = vecop_list, 2019203aca91SRichard Henderson .vece = MO_16 }, 2020203aca91SRichard Henderson { .fni4 = gen_uhadd_i32, 2021203aca91SRichard Henderson .fniv = gen_uhadd_vec, 2022203aca91SRichard Henderson .opt_opc = vecop_list, 2023203aca91SRichard Henderson .vece = MO_32 }, 2024203aca91SRichard Henderson }; 2025203aca91SRichard Henderson tcg_debug_assert(vece <= MO_32); 2026203aca91SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 2027203aca91SRichard Henderson } 202834c0d865SRichard Henderson 202934c0d865SRichard Henderson static void gen_shsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 203034c0d865SRichard Henderson { 203134c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 203234c0d865SRichard Henderson 203334c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 203434c0d865SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, 1); 203534c0d865SRichard Henderson tcg_gen_vec_sar8i_i64(b, b, 1); 203634c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 203734c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, a, b); 203834c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, d, t); 203934c0d865SRichard Henderson } 204034c0d865SRichard Henderson 204134c0d865SRichard Henderson static void gen_shsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 204234c0d865SRichard Henderson { 204334c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 204434c0d865SRichard Henderson 204534c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 204634c0d865SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, 1); 204734c0d865SRichard Henderson tcg_gen_vec_sar16i_i64(b, b, 1); 204834c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 204934c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, a, b); 205034c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, d, t); 205134c0d865SRichard Henderson } 205234c0d865SRichard Henderson 205334c0d865SRichard Henderson static void gen_shsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 205434c0d865SRichard Henderson { 205534c0d865SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 205634c0d865SRichard Henderson 205734c0d865SRichard Henderson tcg_gen_andc_i32(t, b, a); 205834c0d865SRichard Henderson tcg_gen_sari_i32(a, a, 1); 205934c0d865SRichard Henderson tcg_gen_sari_i32(b, b, 1); 206034c0d865SRichard Henderson tcg_gen_andi_i32(t, t, 1); 206134c0d865SRichard Henderson tcg_gen_sub_i32(d, a, b); 206234c0d865SRichard Henderson tcg_gen_sub_i32(d, d, t); 206334c0d865SRichard Henderson } 206434c0d865SRichard Henderson 206534c0d865SRichard Henderson static void gen_shsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 206634c0d865SRichard Henderson { 206734c0d865SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 206834c0d865SRichard Henderson 206934c0d865SRichard Henderson tcg_gen_andc_vec(vece, t, b, a); 207034c0d865SRichard Henderson tcg_gen_sari_vec(vece, a, a, 1); 207134c0d865SRichard Henderson tcg_gen_sari_vec(vece, b, b, 1); 207234c0d865SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 207334c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, a, b); 207434c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 207534c0d865SRichard Henderson } 207634c0d865SRichard Henderson 207734c0d865SRichard Henderson void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 207834c0d865SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 207934c0d865SRichard Henderson { 208034c0d865SRichard Henderson static const TCGOpcode vecop_list[] = { 208134c0d865SRichard Henderson INDEX_op_sari_vec, INDEX_op_sub_vec, 0 208234c0d865SRichard Henderson }; 208334c0d865SRichard Henderson static const GVecGen3 g[4] = { 208434c0d865SRichard Henderson { .fni8 = gen_shsub8_i64, 208534c0d865SRichard Henderson .fniv = gen_shsub_vec, 208634c0d865SRichard Henderson .opt_opc = vecop_list, 208734c0d865SRichard Henderson .vece = MO_8 }, 208834c0d865SRichard Henderson { .fni8 = gen_shsub16_i64, 208934c0d865SRichard Henderson .fniv = gen_shsub_vec, 209034c0d865SRichard Henderson .opt_opc = vecop_list, 209134c0d865SRichard Henderson .vece = MO_16 }, 209234c0d865SRichard Henderson { .fni4 = gen_shsub_i32, 209334c0d865SRichard Henderson .fniv = gen_shsub_vec, 209434c0d865SRichard Henderson .opt_opc = vecop_list, 209534c0d865SRichard Henderson .vece = MO_32 }, 209634c0d865SRichard Henderson }; 209734c0d865SRichard Henderson assert(vece <= MO_32); 209834c0d865SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 209934c0d865SRichard Henderson } 210034c0d865SRichard Henderson 210134c0d865SRichard Henderson static void gen_uhsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 210234c0d865SRichard Henderson { 210334c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 210434c0d865SRichard Henderson 210534c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 210634c0d865SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, 1); 210734c0d865SRichard Henderson tcg_gen_vec_shr8i_i64(b, b, 1); 210834c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 210934c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, a, b); 211034c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, d, t); 211134c0d865SRichard Henderson } 211234c0d865SRichard Henderson 211334c0d865SRichard Henderson static void gen_uhsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 211434c0d865SRichard Henderson { 211534c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 211634c0d865SRichard Henderson 211734c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 211834c0d865SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, 1); 211934c0d865SRichard Henderson tcg_gen_vec_shr16i_i64(b, b, 1); 212034c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 212134c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, a, b); 212234c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, d, t); 212334c0d865SRichard Henderson } 212434c0d865SRichard Henderson 212534c0d865SRichard Henderson static void gen_uhsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 212634c0d865SRichard Henderson { 212734c0d865SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 212834c0d865SRichard Henderson 212934c0d865SRichard Henderson tcg_gen_andc_i32(t, b, a); 213034c0d865SRichard Henderson tcg_gen_shri_i32(a, a, 1); 213134c0d865SRichard Henderson tcg_gen_shri_i32(b, b, 1); 213234c0d865SRichard Henderson tcg_gen_andi_i32(t, t, 1); 213334c0d865SRichard Henderson tcg_gen_sub_i32(d, a, b); 213434c0d865SRichard Henderson tcg_gen_sub_i32(d, d, t); 213534c0d865SRichard Henderson } 213634c0d865SRichard Henderson 213734c0d865SRichard Henderson static void gen_uhsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 213834c0d865SRichard Henderson { 213934c0d865SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 214034c0d865SRichard Henderson 214134c0d865SRichard Henderson tcg_gen_andc_vec(vece, t, b, a); 214234c0d865SRichard Henderson tcg_gen_shri_vec(vece, a, a, 1); 214334c0d865SRichard Henderson tcg_gen_shri_vec(vece, b, b, 1); 214434c0d865SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 214534c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, a, b); 214634c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 214734c0d865SRichard Henderson } 214834c0d865SRichard Henderson 214934c0d865SRichard Henderson void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 215034c0d865SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 215134c0d865SRichard Henderson { 215234c0d865SRichard Henderson static const TCGOpcode vecop_list[] = { 215334c0d865SRichard Henderson INDEX_op_shri_vec, INDEX_op_sub_vec, 0 215434c0d865SRichard Henderson }; 215534c0d865SRichard Henderson static const GVecGen3 g[4] = { 215634c0d865SRichard Henderson { .fni8 = gen_uhsub8_i64, 215734c0d865SRichard Henderson .fniv = gen_uhsub_vec, 215834c0d865SRichard Henderson .opt_opc = vecop_list, 215934c0d865SRichard Henderson .vece = MO_8 }, 216034c0d865SRichard Henderson { .fni8 = gen_uhsub16_i64, 216134c0d865SRichard Henderson .fniv = gen_uhsub_vec, 216234c0d865SRichard Henderson .opt_opc = vecop_list, 216334c0d865SRichard Henderson .vece = MO_16 }, 216434c0d865SRichard Henderson { .fni4 = gen_uhsub_i32, 216534c0d865SRichard Henderson .fniv = gen_uhsub_vec, 216634c0d865SRichard Henderson .opt_opc = vecop_list, 216734c0d865SRichard Henderson .vece = MO_32 }, 216834c0d865SRichard Henderson }; 216934c0d865SRichard Henderson assert(vece <= MO_32); 217034c0d865SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 217134c0d865SRichard Henderson } 21728989b95eSRichard Henderson 21738989b95eSRichard Henderson static void gen_srhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 21748989b95eSRichard Henderson { 21758989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 21768989b95eSRichard Henderson 21778989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 21788989b95eSRichard Henderson tcg_gen_vec_sar8i_i64(a, a, 1); 21798989b95eSRichard Henderson tcg_gen_vec_sar8i_i64(b, b, 1); 21808989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 21818989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 21828989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 21838989b95eSRichard Henderson } 21848989b95eSRichard Henderson 21858989b95eSRichard Henderson static void gen_srhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 21868989b95eSRichard Henderson { 21878989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 21888989b95eSRichard Henderson 21898989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 21908989b95eSRichard Henderson tcg_gen_vec_sar16i_i64(a, a, 1); 21918989b95eSRichard Henderson tcg_gen_vec_sar16i_i64(b, b, 1); 21928989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 21938989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 21948989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 21958989b95eSRichard Henderson } 21968989b95eSRichard Henderson 21978989b95eSRichard Henderson static void gen_srhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 21988989b95eSRichard Henderson { 21998989b95eSRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 22008989b95eSRichard Henderson 22018989b95eSRichard Henderson tcg_gen_or_i32(t, a, b); 22028989b95eSRichard Henderson tcg_gen_sari_i32(a, a, 1); 22038989b95eSRichard Henderson tcg_gen_sari_i32(b, b, 1); 22048989b95eSRichard Henderson tcg_gen_andi_i32(t, t, 1); 22058989b95eSRichard Henderson tcg_gen_add_i32(d, a, b); 22068989b95eSRichard Henderson tcg_gen_add_i32(d, d, t); 22078989b95eSRichard Henderson } 22088989b95eSRichard Henderson 22098989b95eSRichard Henderson static void gen_srhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 22108989b95eSRichard Henderson { 22118989b95eSRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 22128989b95eSRichard Henderson 22138989b95eSRichard Henderson tcg_gen_or_vec(vece, t, a, b); 22148989b95eSRichard Henderson tcg_gen_sari_vec(vece, a, a, 1); 22158989b95eSRichard Henderson tcg_gen_sari_vec(vece, b, b, 1); 22168989b95eSRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 22178989b95eSRichard Henderson tcg_gen_add_vec(vece, d, a, b); 22188989b95eSRichard Henderson tcg_gen_add_vec(vece, d, d, t); 22198989b95eSRichard Henderson } 22208989b95eSRichard Henderson 22218989b95eSRichard Henderson void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 22228989b95eSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 22238989b95eSRichard Henderson { 22248989b95eSRichard Henderson static const TCGOpcode vecop_list[] = { 22258989b95eSRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 22268989b95eSRichard Henderson }; 22278989b95eSRichard Henderson static const GVecGen3 g[] = { 22288989b95eSRichard Henderson { .fni8 = gen_srhadd8_i64, 22298989b95eSRichard Henderson .fniv = gen_srhadd_vec, 22308989b95eSRichard Henderson .opt_opc = vecop_list, 22318989b95eSRichard Henderson .vece = MO_8 }, 22328989b95eSRichard Henderson { .fni8 = gen_srhadd16_i64, 22338989b95eSRichard Henderson .fniv = gen_srhadd_vec, 22348989b95eSRichard Henderson .opt_opc = vecop_list, 22358989b95eSRichard Henderson .vece = MO_16 }, 22368989b95eSRichard Henderson { .fni4 = gen_srhadd_i32, 22378989b95eSRichard Henderson .fniv = gen_srhadd_vec, 22388989b95eSRichard Henderson .opt_opc = vecop_list, 22398989b95eSRichard Henderson .vece = MO_32 }, 22408989b95eSRichard Henderson }; 22418989b95eSRichard Henderson assert(vece <= MO_32); 22428989b95eSRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 22438989b95eSRichard Henderson } 22448989b95eSRichard Henderson 22458989b95eSRichard Henderson static void gen_urhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 22468989b95eSRichard Henderson { 22478989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 22488989b95eSRichard Henderson 22498989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 22508989b95eSRichard Henderson tcg_gen_vec_shr8i_i64(a, a, 1); 22518989b95eSRichard Henderson tcg_gen_vec_shr8i_i64(b, b, 1); 22528989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 22538989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 22548989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 22558989b95eSRichard Henderson } 22568989b95eSRichard Henderson 22578989b95eSRichard Henderson static void gen_urhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 22588989b95eSRichard Henderson { 22598989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 22608989b95eSRichard Henderson 22618989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 22628989b95eSRichard Henderson tcg_gen_vec_shr16i_i64(a, a, 1); 22638989b95eSRichard Henderson tcg_gen_vec_shr16i_i64(b, b, 1); 22648989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 22658989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 22668989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 22678989b95eSRichard Henderson } 22688989b95eSRichard Henderson 22698989b95eSRichard Henderson static void gen_urhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 22708989b95eSRichard Henderson { 22718989b95eSRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 22728989b95eSRichard Henderson 22738989b95eSRichard Henderson tcg_gen_or_i32(t, a, b); 22748989b95eSRichard Henderson tcg_gen_shri_i32(a, a, 1); 22758989b95eSRichard Henderson tcg_gen_shri_i32(b, b, 1); 22768989b95eSRichard Henderson tcg_gen_andi_i32(t, t, 1); 22778989b95eSRichard Henderson tcg_gen_add_i32(d, a, b); 22788989b95eSRichard Henderson tcg_gen_add_i32(d, d, t); 22798989b95eSRichard Henderson } 22808989b95eSRichard Henderson 22818989b95eSRichard Henderson static void gen_urhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 22828989b95eSRichard Henderson { 22838989b95eSRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 22848989b95eSRichard Henderson 22858989b95eSRichard Henderson tcg_gen_or_vec(vece, t, a, b); 22868989b95eSRichard Henderson tcg_gen_shri_vec(vece, a, a, 1); 22878989b95eSRichard Henderson tcg_gen_shri_vec(vece, b, b, 1); 22888989b95eSRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 22898989b95eSRichard Henderson tcg_gen_add_vec(vece, d, a, b); 22908989b95eSRichard Henderson tcg_gen_add_vec(vece, d, d, t); 22918989b95eSRichard Henderson } 22928989b95eSRichard Henderson 22938989b95eSRichard Henderson void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 22948989b95eSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 22958989b95eSRichard Henderson { 22968989b95eSRichard Henderson static const TCGOpcode vecop_list[] = { 22978989b95eSRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 22988989b95eSRichard Henderson }; 22998989b95eSRichard Henderson static const GVecGen3 g[] = { 23008989b95eSRichard Henderson { .fni8 = gen_urhadd8_i64, 23018989b95eSRichard Henderson .fniv = gen_urhadd_vec, 23028989b95eSRichard Henderson .opt_opc = vecop_list, 23038989b95eSRichard Henderson .vece = MO_8 }, 23048989b95eSRichard Henderson { .fni8 = gen_urhadd16_i64, 23058989b95eSRichard Henderson .fniv = gen_urhadd_vec, 23068989b95eSRichard Henderson .opt_opc = vecop_list, 23078989b95eSRichard Henderson .vece = MO_16 }, 23088989b95eSRichard Henderson { .fni4 = gen_urhadd_i32, 23098989b95eSRichard Henderson .fniv = gen_urhadd_vec, 23108989b95eSRichard Henderson .opt_opc = vecop_list, 23118989b95eSRichard Henderson .vece = MO_32 }, 23128989b95eSRichard Henderson }; 23138989b95eSRichard Henderson assert(vece <= MO_32); 23148989b95eSRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 23158989b95eSRichard Henderson } 2316