109a52d85SRichard Henderson /* 209a52d85SRichard Henderson * ARM generic vector expansion 309a52d85SRichard Henderson * 409a52d85SRichard Henderson * Copyright (c) 2003 Fabrice Bellard 509a52d85SRichard Henderson * Copyright (c) 2005-2007 CodeSourcery 609a52d85SRichard Henderson * Copyright (c) 2007 OpenedHand, Ltd. 709a52d85SRichard Henderson * 809a52d85SRichard Henderson * This library is free software; you can redistribute it and/or 909a52d85SRichard Henderson * modify it under the terms of the GNU Lesser General Public 1009a52d85SRichard Henderson * License as published by the Free Software Foundation; either 1109a52d85SRichard Henderson * version 2.1 of the License, or (at your option) any later version. 1209a52d85SRichard Henderson * 1309a52d85SRichard Henderson * This library is distributed in the hope that it will be useful, 1409a52d85SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 1509a52d85SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1609a52d85SRichard Henderson * Lesser General Public License for more details. 1709a52d85SRichard Henderson * 1809a52d85SRichard Henderson * You should have received a copy of the GNU Lesser General Public 1909a52d85SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 2009a52d85SRichard Henderson */ 2109a52d85SRichard Henderson 2209a52d85SRichard Henderson #include "qemu/osdep.h" 2309a52d85SRichard Henderson #include "translate.h" 2409a52d85SRichard Henderson 2509a52d85SRichard Henderson 2609a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 2709a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz, 2809a52d85SRichard Henderson gen_helper_gvec_3_ptr *fn) 2909a52d85SRichard Henderson { 3009a52d85SRichard Henderson TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 3109a52d85SRichard Henderson 3201d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 3309a52d85SRichard Henderson tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 3409a52d85SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 3509a52d85SRichard Henderson opr_sz, max_sz, 0, fn); 3609a52d85SRichard Henderson } 3709a52d85SRichard Henderson 388f81dcedSRichard Henderson void gen_gvec_sqdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 398f81dcedSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 408f81dcedSRichard Henderson { 418f81dcedSRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 428f81dcedSRichard Henderson gen_helper_neon_sqdmulh_h, gen_helper_neon_sqdmulh_s 438f81dcedSRichard Henderson }; 448f81dcedSRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 458f81dcedSRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 468f81dcedSRichard Henderson } 478f81dcedSRichard Henderson 488f81dcedSRichard Henderson void gen_gvec_sqrdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 498f81dcedSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 508f81dcedSRichard Henderson { 518f81dcedSRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 528f81dcedSRichard Henderson gen_helper_neon_sqrdmulh_h, gen_helper_neon_sqrdmulh_s 538f81dcedSRichard Henderson }; 548f81dcedSRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 558f81dcedSRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 568f81dcedSRichard Henderson } 578f81dcedSRichard Henderson 5809a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 5909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 6009a52d85SRichard Henderson { 6109a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 6209a52d85SRichard Henderson gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 6309a52d85SRichard Henderson }; 6409a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 6509a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 6609a52d85SRichard Henderson } 6709a52d85SRichard Henderson 6809a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 6909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 7009a52d85SRichard Henderson { 7109a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 7209a52d85SRichard Henderson gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 7309a52d85SRichard Henderson }; 7409a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 7509a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 7609a52d85SRichard Henderson } 7709a52d85SRichard Henderson 7809a52d85SRichard Henderson #define GEN_CMP0(NAME, COND) \ 7909a52d85SRichard Henderson void NAME(unsigned vece, uint32_t d, uint32_t m, \ 8009a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz) \ 8109a52d85SRichard Henderson { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 8209a52d85SRichard Henderson 8309a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 8409a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 8509a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 8609a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 8709a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 8809a52d85SRichard Henderson 8909a52d85SRichard Henderson #undef GEN_CMP0 9009a52d85SRichard Henderson 9109a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 9209a52d85SRichard Henderson { 9309a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, shift); 9409a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 9509a52d85SRichard Henderson } 9609a52d85SRichard Henderson 9709a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 9809a52d85SRichard Henderson { 9909a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, shift); 10009a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 10109a52d85SRichard Henderson } 10209a52d85SRichard Henderson 10309a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 10409a52d85SRichard Henderson { 10509a52d85SRichard Henderson tcg_gen_sari_i32(a, a, shift); 10609a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 10709a52d85SRichard Henderson } 10809a52d85SRichard Henderson 10909a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 11009a52d85SRichard Henderson { 11109a52d85SRichard Henderson tcg_gen_sari_i64(a, a, shift); 11209a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 11309a52d85SRichard Henderson } 11409a52d85SRichard Henderson 11509a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 11609a52d85SRichard Henderson { 11709a52d85SRichard Henderson tcg_gen_sari_vec(vece, a, a, sh); 11809a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 11909a52d85SRichard Henderson } 12009a52d85SRichard Henderson 12109a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 12209a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 12309a52d85SRichard Henderson { 12409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 12509a52d85SRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 12609a52d85SRichard Henderson }; 12709a52d85SRichard Henderson static const GVecGen2i ops[4] = { 12809a52d85SRichard Henderson { .fni8 = gen_ssra8_i64, 12909a52d85SRichard Henderson .fniv = gen_ssra_vec, 13009a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_b, 13109a52d85SRichard Henderson .load_dest = true, 13209a52d85SRichard Henderson .opt_opc = vecop_list, 13309a52d85SRichard Henderson .vece = MO_8 }, 13409a52d85SRichard Henderson { .fni8 = gen_ssra16_i64, 13509a52d85SRichard Henderson .fniv = gen_ssra_vec, 13609a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_h, 13709a52d85SRichard Henderson .load_dest = true, 13809a52d85SRichard Henderson .opt_opc = vecop_list, 13909a52d85SRichard Henderson .vece = MO_16 }, 14009a52d85SRichard Henderson { .fni4 = gen_ssra32_i32, 14109a52d85SRichard Henderson .fniv = gen_ssra_vec, 14209a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_s, 14309a52d85SRichard Henderson .load_dest = true, 14409a52d85SRichard Henderson .opt_opc = vecop_list, 14509a52d85SRichard Henderson .vece = MO_32 }, 14609a52d85SRichard Henderson { .fni8 = gen_ssra64_i64, 14709a52d85SRichard Henderson .fniv = gen_ssra_vec, 14809a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_d, 14909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 15009a52d85SRichard Henderson .opt_opc = vecop_list, 15109a52d85SRichard Henderson .load_dest = true, 15209a52d85SRichard Henderson .vece = MO_64 }, 15309a52d85SRichard Henderson }; 15409a52d85SRichard Henderson 15509a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 15609a52d85SRichard Henderson tcg_debug_assert(shift > 0); 15709a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 15809a52d85SRichard Henderson 15909a52d85SRichard Henderson /* 16009a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 16109a52d85SRichard Henderson * Signed results in all sign bits. 16209a52d85SRichard Henderson */ 16309a52d85SRichard Henderson shift = MIN(shift, (8 << vece) - 1); 16409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 16509a52d85SRichard Henderson } 16609a52d85SRichard Henderson 16709a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 16809a52d85SRichard Henderson { 16909a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, shift); 17009a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 17109a52d85SRichard Henderson } 17209a52d85SRichard Henderson 17309a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 17409a52d85SRichard Henderson { 17509a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, shift); 17609a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 17709a52d85SRichard Henderson } 17809a52d85SRichard Henderson 17909a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 18009a52d85SRichard Henderson { 18109a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 18209a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 18309a52d85SRichard Henderson } 18409a52d85SRichard Henderson 18509a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 18609a52d85SRichard Henderson { 18709a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 18809a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 18909a52d85SRichard Henderson } 19009a52d85SRichard Henderson 19109a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 19209a52d85SRichard Henderson { 19309a52d85SRichard Henderson tcg_gen_shri_vec(vece, a, a, sh); 19409a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 19509a52d85SRichard Henderson } 19609a52d85SRichard Henderson 19709a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 19809a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 19909a52d85SRichard Henderson { 20009a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 20109a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 20209a52d85SRichard Henderson }; 20309a52d85SRichard Henderson static const GVecGen2i ops[4] = { 20409a52d85SRichard Henderson { .fni8 = gen_usra8_i64, 20509a52d85SRichard Henderson .fniv = gen_usra_vec, 20609a52d85SRichard Henderson .fno = gen_helper_gvec_usra_b, 20709a52d85SRichard Henderson .load_dest = true, 20809a52d85SRichard Henderson .opt_opc = vecop_list, 20909a52d85SRichard Henderson .vece = MO_8, }, 21009a52d85SRichard Henderson { .fni8 = gen_usra16_i64, 21109a52d85SRichard Henderson .fniv = gen_usra_vec, 21209a52d85SRichard Henderson .fno = gen_helper_gvec_usra_h, 21309a52d85SRichard Henderson .load_dest = true, 21409a52d85SRichard Henderson .opt_opc = vecop_list, 21509a52d85SRichard Henderson .vece = MO_16, }, 21609a52d85SRichard Henderson { .fni4 = gen_usra32_i32, 21709a52d85SRichard Henderson .fniv = gen_usra_vec, 21809a52d85SRichard Henderson .fno = gen_helper_gvec_usra_s, 21909a52d85SRichard Henderson .load_dest = true, 22009a52d85SRichard Henderson .opt_opc = vecop_list, 22109a52d85SRichard Henderson .vece = MO_32, }, 22209a52d85SRichard Henderson { .fni8 = gen_usra64_i64, 22309a52d85SRichard Henderson .fniv = gen_usra_vec, 22409a52d85SRichard Henderson .fno = gen_helper_gvec_usra_d, 22509a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 22609a52d85SRichard Henderson .load_dest = true, 22709a52d85SRichard Henderson .opt_opc = vecop_list, 22809a52d85SRichard Henderson .vece = MO_64, }, 22909a52d85SRichard Henderson }; 23009a52d85SRichard Henderson 23109a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 23209a52d85SRichard Henderson tcg_debug_assert(shift > 0); 23309a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 23409a52d85SRichard Henderson 23509a52d85SRichard Henderson /* 23609a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 23709a52d85SRichard Henderson * Unsigned results in all zeros as input to accumulate: nop. 23809a52d85SRichard Henderson */ 23909a52d85SRichard Henderson if (shift < (8 << vece)) { 24009a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 24109a52d85SRichard Henderson } else { 24209a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 24309a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 24409a52d85SRichard Henderson } 24509a52d85SRichard Henderson } 24609a52d85SRichard Henderson 24709a52d85SRichard Henderson /* 24809a52d85SRichard Henderson * Shift one less than the requested amount, and the low bit is 24909a52d85SRichard Henderson * the rounding bit. For the 8 and 16-bit operations, because we 25009a52d85SRichard Henderson * mask the low bit, we can perform a normal integer shift instead 25109a52d85SRichard Henderson * of a vector shift. 25209a52d85SRichard Henderson */ 25309a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 25409a52d85SRichard Henderson { 25509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 25609a52d85SRichard Henderson 25709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 25809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 25909a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(d, a, sh); 26009a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 26109a52d85SRichard Henderson } 26209a52d85SRichard Henderson 26309a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 26409a52d85SRichard Henderson { 26509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 26609a52d85SRichard Henderson 26709a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 26809a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 26909a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(d, a, sh); 27009a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 27109a52d85SRichard Henderson } 27209a52d85SRichard Henderson 27309a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 27409a52d85SRichard Henderson { 27509a52d85SRichard Henderson TCGv_i32 t; 27609a52d85SRichard Henderson 27709a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 27809a52d85SRichard Henderson if (sh == 32) { 27909a52d85SRichard Henderson tcg_gen_movi_i32(d, 0); 28009a52d85SRichard Henderson return; 28109a52d85SRichard Henderson } 28209a52d85SRichard Henderson t = tcg_temp_new_i32(); 28309a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 28409a52d85SRichard Henderson tcg_gen_sari_i32(d, a, sh); 28509a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 28609a52d85SRichard Henderson } 28709a52d85SRichard Henderson 28809a52d85SRichard Henderson void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 28909a52d85SRichard Henderson { 29009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 29109a52d85SRichard Henderson 29209a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 29309a52d85SRichard Henderson tcg_gen_sari_i64(d, a, sh); 29409a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 29509a52d85SRichard Henderson } 29609a52d85SRichard Henderson 29709a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 29809a52d85SRichard Henderson { 29909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 300143e179cSRichard Henderson TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1); 30109a52d85SRichard Henderson 30209a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 30309a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 30409a52d85SRichard Henderson tcg_gen_sari_vec(vece, d, a, sh); 30509a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 30609a52d85SRichard Henderson } 30709a52d85SRichard Henderson 30809a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 30909a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 31009a52d85SRichard Henderson { 31109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 31209a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 31309a52d85SRichard Henderson }; 31409a52d85SRichard Henderson static const GVecGen2i ops[4] = { 31509a52d85SRichard Henderson { .fni8 = gen_srshr8_i64, 31609a52d85SRichard Henderson .fniv = gen_srshr_vec, 31709a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_b, 31809a52d85SRichard Henderson .opt_opc = vecop_list, 31909a52d85SRichard Henderson .vece = MO_8 }, 32009a52d85SRichard Henderson { .fni8 = gen_srshr16_i64, 32109a52d85SRichard Henderson .fniv = gen_srshr_vec, 32209a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_h, 32309a52d85SRichard Henderson .opt_opc = vecop_list, 32409a52d85SRichard Henderson .vece = MO_16 }, 32509a52d85SRichard Henderson { .fni4 = gen_srshr32_i32, 32609a52d85SRichard Henderson .fniv = gen_srshr_vec, 32709a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_s, 32809a52d85SRichard Henderson .opt_opc = vecop_list, 32909a52d85SRichard Henderson .vece = MO_32 }, 33009a52d85SRichard Henderson { .fni8 = gen_srshr64_i64, 33109a52d85SRichard Henderson .fniv = gen_srshr_vec, 33209a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_d, 33309a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 33409a52d85SRichard Henderson .opt_opc = vecop_list, 33509a52d85SRichard Henderson .vece = MO_64 }, 33609a52d85SRichard Henderson }; 33709a52d85SRichard Henderson 33809a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 33909a52d85SRichard Henderson tcg_debug_assert(shift > 0); 34009a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 34109a52d85SRichard Henderson 34209a52d85SRichard Henderson if (shift == (8 << vece)) { 34309a52d85SRichard Henderson /* 34409a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 34509a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 34609a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 34709a52d85SRichard Henderson * I.e. always zero. 34809a52d85SRichard Henderson */ 34909a52d85SRichard Henderson tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 35009a52d85SRichard Henderson } else { 35109a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 35209a52d85SRichard Henderson } 35309a52d85SRichard Henderson } 35409a52d85SRichard Henderson 35509a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 35609a52d85SRichard Henderson { 35709a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 35809a52d85SRichard Henderson 35909a52d85SRichard Henderson gen_srshr8_i64(t, a, sh); 36009a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 36109a52d85SRichard Henderson } 36209a52d85SRichard Henderson 36309a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 36409a52d85SRichard Henderson { 36509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 36609a52d85SRichard Henderson 36709a52d85SRichard Henderson gen_srshr16_i64(t, a, sh); 36809a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 36909a52d85SRichard Henderson } 37009a52d85SRichard Henderson 37109a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 37209a52d85SRichard Henderson { 37309a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 37409a52d85SRichard Henderson 37509a52d85SRichard Henderson gen_srshr32_i32(t, a, sh); 37609a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 37709a52d85SRichard Henderson } 37809a52d85SRichard Henderson 37909a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 38009a52d85SRichard Henderson { 38109a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 38209a52d85SRichard Henderson 38309a52d85SRichard Henderson gen_srshr64_i64(t, a, sh); 38409a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 38509a52d85SRichard Henderson } 38609a52d85SRichard Henderson 38709a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 38809a52d85SRichard Henderson { 38909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 39009a52d85SRichard Henderson 39109a52d85SRichard Henderson gen_srshr_vec(vece, t, a, sh); 39209a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 39309a52d85SRichard Henderson } 39409a52d85SRichard Henderson 39509a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 39609a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 39709a52d85SRichard Henderson { 39809a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 39909a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 40009a52d85SRichard Henderson }; 40109a52d85SRichard Henderson static const GVecGen2i ops[4] = { 40209a52d85SRichard Henderson { .fni8 = gen_srsra8_i64, 40309a52d85SRichard Henderson .fniv = gen_srsra_vec, 40409a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_b, 40509a52d85SRichard Henderson .opt_opc = vecop_list, 40609a52d85SRichard Henderson .load_dest = true, 40709a52d85SRichard Henderson .vece = MO_8 }, 40809a52d85SRichard Henderson { .fni8 = gen_srsra16_i64, 40909a52d85SRichard Henderson .fniv = gen_srsra_vec, 41009a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_h, 41109a52d85SRichard Henderson .opt_opc = vecop_list, 41209a52d85SRichard Henderson .load_dest = true, 41309a52d85SRichard Henderson .vece = MO_16 }, 41409a52d85SRichard Henderson { .fni4 = gen_srsra32_i32, 41509a52d85SRichard Henderson .fniv = gen_srsra_vec, 41609a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_s, 41709a52d85SRichard Henderson .opt_opc = vecop_list, 41809a52d85SRichard Henderson .load_dest = true, 41909a52d85SRichard Henderson .vece = MO_32 }, 42009a52d85SRichard Henderson { .fni8 = gen_srsra64_i64, 42109a52d85SRichard Henderson .fniv = gen_srsra_vec, 42209a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_d, 42309a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 42409a52d85SRichard Henderson .opt_opc = vecop_list, 42509a52d85SRichard Henderson .load_dest = true, 42609a52d85SRichard Henderson .vece = MO_64 }, 42709a52d85SRichard Henderson }; 42809a52d85SRichard Henderson 42909a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 43009a52d85SRichard Henderson tcg_debug_assert(shift > 0); 43109a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 43209a52d85SRichard Henderson 43309a52d85SRichard Henderson /* 43409a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 43509a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 43609a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 43709a52d85SRichard Henderson * I.e. always zero. With accumulation, this leaves D unchanged. 43809a52d85SRichard Henderson */ 43909a52d85SRichard Henderson if (shift == (8 << vece)) { 44009a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 44109a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 44209a52d85SRichard Henderson } else { 44309a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 44409a52d85SRichard Henderson } 44509a52d85SRichard Henderson } 44609a52d85SRichard Henderson 44709a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 44809a52d85SRichard Henderson { 44909a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 45009a52d85SRichard Henderson 45109a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 45209a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 45309a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(d, a, sh); 45409a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 45509a52d85SRichard Henderson } 45609a52d85SRichard Henderson 45709a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 45809a52d85SRichard Henderson { 45909a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 46009a52d85SRichard Henderson 46109a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 46209a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 46309a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(d, a, sh); 46409a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 46509a52d85SRichard Henderson } 46609a52d85SRichard Henderson 46709a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 46809a52d85SRichard Henderson { 46909a52d85SRichard Henderson TCGv_i32 t; 47009a52d85SRichard Henderson 47109a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 47209a52d85SRichard Henderson if (sh == 32) { 47309a52d85SRichard Henderson tcg_gen_extract_i32(d, a, sh - 1, 1); 47409a52d85SRichard Henderson return; 47509a52d85SRichard Henderson } 47609a52d85SRichard Henderson t = tcg_temp_new_i32(); 47709a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 47809a52d85SRichard Henderson tcg_gen_shri_i32(d, a, sh); 47909a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 48009a52d85SRichard Henderson } 48109a52d85SRichard Henderson 48209a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 48309a52d85SRichard Henderson { 48409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 48509a52d85SRichard Henderson 48609a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 48709a52d85SRichard Henderson tcg_gen_shri_i64(d, a, sh); 48809a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 48909a52d85SRichard Henderson } 49009a52d85SRichard Henderson 49109a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 49209a52d85SRichard Henderson { 49309a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 494143e179cSRichard Henderson TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1); 49509a52d85SRichard Henderson 49609a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, shift - 1); 49709a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 49809a52d85SRichard Henderson tcg_gen_shri_vec(vece, d, a, shift); 49909a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 50009a52d85SRichard Henderson } 50109a52d85SRichard Henderson 50209a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 50309a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 50409a52d85SRichard Henderson { 50509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 50609a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 50709a52d85SRichard Henderson }; 50809a52d85SRichard Henderson static const GVecGen2i ops[4] = { 50909a52d85SRichard Henderson { .fni8 = gen_urshr8_i64, 51009a52d85SRichard Henderson .fniv = gen_urshr_vec, 51109a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_b, 51209a52d85SRichard Henderson .opt_opc = vecop_list, 51309a52d85SRichard Henderson .vece = MO_8 }, 51409a52d85SRichard Henderson { .fni8 = gen_urshr16_i64, 51509a52d85SRichard Henderson .fniv = gen_urshr_vec, 51609a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_h, 51709a52d85SRichard Henderson .opt_opc = vecop_list, 51809a52d85SRichard Henderson .vece = MO_16 }, 51909a52d85SRichard Henderson { .fni4 = gen_urshr32_i32, 52009a52d85SRichard Henderson .fniv = gen_urshr_vec, 52109a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_s, 52209a52d85SRichard Henderson .opt_opc = vecop_list, 52309a52d85SRichard Henderson .vece = MO_32 }, 52409a52d85SRichard Henderson { .fni8 = gen_urshr64_i64, 52509a52d85SRichard Henderson .fniv = gen_urshr_vec, 52609a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_d, 52709a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 52809a52d85SRichard Henderson .opt_opc = vecop_list, 52909a52d85SRichard Henderson .vece = MO_64 }, 53009a52d85SRichard Henderson }; 53109a52d85SRichard Henderson 53209a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 53309a52d85SRichard Henderson tcg_debug_assert(shift > 0); 53409a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 53509a52d85SRichard Henderson 53609a52d85SRichard Henderson if (shift == (8 << vece)) { 53709a52d85SRichard Henderson /* 53809a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 53909a52d85SRichard Henderson * Unsigned results in zero. With rounding, this produces a 54009a52d85SRichard Henderson * copy of the most significant bit. 54109a52d85SRichard Henderson */ 54209a52d85SRichard Henderson tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 54309a52d85SRichard Henderson } else { 54409a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 54509a52d85SRichard Henderson } 54609a52d85SRichard Henderson } 54709a52d85SRichard Henderson 54809a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 54909a52d85SRichard Henderson { 55009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 55109a52d85SRichard Henderson 55209a52d85SRichard Henderson if (sh == 8) { 55309a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(t, a, 7); 55409a52d85SRichard Henderson } else { 55509a52d85SRichard Henderson gen_urshr8_i64(t, a, sh); 55609a52d85SRichard Henderson } 55709a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 55809a52d85SRichard Henderson } 55909a52d85SRichard Henderson 56009a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 56109a52d85SRichard Henderson { 56209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 56309a52d85SRichard Henderson 56409a52d85SRichard Henderson if (sh == 16) { 56509a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(t, a, 15); 56609a52d85SRichard Henderson } else { 56709a52d85SRichard Henderson gen_urshr16_i64(t, a, sh); 56809a52d85SRichard Henderson } 56909a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 57009a52d85SRichard Henderson } 57109a52d85SRichard Henderson 57209a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 57309a52d85SRichard Henderson { 57409a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 57509a52d85SRichard Henderson 57609a52d85SRichard Henderson if (sh == 32) { 57709a52d85SRichard Henderson tcg_gen_shri_i32(t, a, 31); 57809a52d85SRichard Henderson } else { 57909a52d85SRichard Henderson gen_urshr32_i32(t, a, sh); 58009a52d85SRichard Henderson } 58109a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 58209a52d85SRichard Henderson } 58309a52d85SRichard Henderson 58409a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 58509a52d85SRichard Henderson { 58609a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 58709a52d85SRichard Henderson 58809a52d85SRichard Henderson if (sh == 64) { 58909a52d85SRichard Henderson tcg_gen_shri_i64(t, a, 63); 59009a52d85SRichard Henderson } else { 59109a52d85SRichard Henderson gen_urshr64_i64(t, a, sh); 59209a52d85SRichard Henderson } 59309a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 59409a52d85SRichard Henderson } 59509a52d85SRichard Henderson 59609a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 59709a52d85SRichard Henderson { 59809a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 59909a52d85SRichard Henderson 60009a52d85SRichard Henderson if (sh == (8 << vece)) { 60109a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 60209a52d85SRichard Henderson } else { 60309a52d85SRichard Henderson gen_urshr_vec(vece, t, a, sh); 60409a52d85SRichard Henderson } 60509a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 60609a52d85SRichard Henderson } 60709a52d85SRichard Henderson 60809a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 60909a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 61009a52d85SRichard Henderson { 61109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 61209a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 61309a52d85SRichard Henderson }; 61409a52d85SRichard Henderson static const GVecGen2i ops[4] = { 61509a52d85SRichard Henderson { .fni8 = gen_ursra8_i64, 61609a52d85SRichard Henderson .fniv = gen_ursra_vec, 61709a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_b, 61809a52d85SRichard Henderson .opt_opc = vecop_list, 61909a52d85SRichard Henderson .load_dest = true, 62009a52d85SRichard Henderson .vece = MO_8 }, 62109a52d85SRichard Henderson { .fni8 = gen_ursra16_i64, 62209a52d85SRichard Henderson .fniv = gen_ursra_vec, 62309a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_h, 62409a52d85SRichard Henderson .opt_opc = vecop_list, 62509a52d85SRichard Henderson .load_dest = true, 62609a52d85SRichard Henderson .vece = MO_16 }, 62709a52d85SRichard Henderson { .fni4 = gen_ursra32_i32, 62809a52d85SRichard Henderson .fniv = gen_ursra_vec, 62909a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_s, 63009a52d85SRichard Henderson .opt_opc = vecop_list, 63109a52d85SRichard Henderson .load_dest = true, 63209a52d85SRichard Henderson .vece = MO_32 }, 63309a52d85SRichard Henderson { .fni8 = gen_ursra64_i64, 63409a52d85SRichard Henderson .fniv = gen_ursra_vec, 63509a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_d, 63609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 63709a52d85SRichard Henderson .opt_opc = vecop_list, 63809a52d85SRichard Henderson .load_dest = true, 63909a52d85SRichard Henderson .vece = MO_64 }, 64009a52d85SRichard Henderson }; 64109a52d85SRichard Henderson 64209a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 64309a52d85SRichard Henderson tcg_debug_assert(shift > 0); 64409a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 64509a52d85SRichard Henderson 64609a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 64709a52d85SRichard Henderson } 64809a52d85SRichard Henderson 64909a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 65009a52d85SRichard Henderson { 65109a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff >> shift); 65209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 65309a52d85SRichard Henderson 65409a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 65509a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 65609a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 65709a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 65809a52d85SRichard Henderson } 65909a52d85SRichard Henderson 66009a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 66109a52d85SRichard Henderson { 66209a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff >> shift); 66309a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 66409a52d85SRichard Henderson 66509a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 66609a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 66709a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 66809a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 66909a52d85SRichard Henderson } 67009a52d85SRichard Henderson 67109a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 67209a52d85SRichard Henderson { 67309a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 67409a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 67509a52d85SRichard Henderson } 67609a52d85SRichard Henderson 67709a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 67809a52d85SRichard Henderson { 67909a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 68009a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 68109a52d85SRichard Henderson } 68209a52d85SRichard Henderson 68309a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 68409a52d85SRichard Henderson { 68509a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 686143e179cSRichard Henderson int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh); 687143e179cSRichard Henderson TCGv_vec m = tcg_constant_vec_matching(d, vece, mi); 68809a52d85SRichard Henderson 68909a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh); 69009a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 69109a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 69209a52d85SRichard Henderson } 69309a52d85SRichard Henderson 69409a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 69509a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 69609a52d85SRichard Henderson { 69709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 69809a52d85SRichard Henderson const GVecGen2i ops[4] = { 69909a52d85SRichard Henderson { .fni8 = gen_shr8_ins_i64, 70009a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 70109a52d85SRichard Henderson .fno = gen_helper_gvec_sri_b, 70209a52d85SRichard Henderson .load_dest = true, 70309a52d85SRichard Henderson .opt_opc = vecop_list, 70409a52d85SRichard Henderson .vece = MO_8 }, 70509a52d85SRichard Henderson { .fni8 = gen_shr16_ins_i64, 70609a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 70709a52d85SRichard Henderson .fno = gen_helper_gvec_sri_h, 70809a52d85SRichard Henderson .load_dest = true, 70909a52d85SRichard Henderson .opt_opc = vecop_list, 71009a52d85SRichard Henderson .vece = MO_16 }, 71109a52d85SRichard Henderson { .fni4 = gen_shr32_ins_i32, 71209a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 71309a52d85SRichard Henderson .fno = gen_helper_gvec_sri_s, 71409a52d85SRichard Henderson .load_dest = true, 71509a52d85SRichard Henderson .opt_opc = vecop_list, 71609a52d85SRichard Henderson .vece = MO_32 }, 71709a52d85SRichard Henderson { .fni8 = gen_shr64_ins_i64, 71809a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 71909a52d85SRichard Henderson .fno = gen_helper_gvec_sri_d, 72009a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 72109a52d85SRichard Henderson .load_dest = true, 72209a52d85SRichard Henderson .opt_opc = vecop_list, 72309a52d85SRichard Henderson .vece = MO_64 }, 72409a52d85SRichard Henderson }; 72509a52d85SRichard Henderson 72609a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 72709a52d85SRichard Henderson tcg_debug_assert(shift > 0); 72809a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 72909a52d85SRichard Henderson 73009a52d85SRichard Henderson /* Shift of esize leaves destination unchanged. */ 73109a52d85SRichard Henderson if (shift < (8 << vece)) { 73209a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 73309a52d85SRichard Henderson } else { 73409a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 73509a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 73609a52d85SRichard Henderson } 73709a52d85SRichard Henderson } 73809a52d85SRichard Henderson 73909a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 74009a52d85SRichard Henderson { 74109a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff << shift); 74209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 74309a52d85SRichard Henderson 74409a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 74509a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 74609a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 74709a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 74809a52d85SRichard Henderson } 74909a52d85SRichard Henderson 75009a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 75109a52d85SRichard Henderson { 75209a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff << shift); 75309a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 75409a52d85SRichard Henderson 75509a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 75609a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 75709a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 75809a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 75909a52d85SRichard Henderson } 76009a52d85SRichard Henderson 76109a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 76209a52d85SRichard Henderson { 76309a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 76409a52d85SRichard Henderson } 76509a52d85SRichard Henderson 76609a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 76709a52d85SRichard Henderson { 76809a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 76909a52d85SRichard Henderson } 77009a52d85SRichard Henderson 77109a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 77209a52d85SRichard Henderson { 77309a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 774143e179cSRichard Henderson TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh)); 77509a52d85SRichard Henderson 77609a52d85SRichard Henderson tcg_gen_shli_vec(vece, t, a, sh); 77709a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 77809a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 77909a52d85SRichard Henderson } 78009a52d85SRichard Henderson 78109a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 78209a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 78309a52d85SRichard Henderson { 78409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 78509a52d85SRichard Henderson const GVecGen2i ops[4] = { 78609a52d85SRichard Henderson { .fni8 = gen_shl8_ins_i64, 78709a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 78809a52d85SRichard Henderson .fno = gen_helper_gvec_sli_b, 78909a52d85SRichard Henderson .load_dest = true, 79009a52d85SRichard Henderson .opt_opc = vecop_list, 79109a52d85SRichard Henderson .vece = MO_8 }, 79209a52d85SRichard Henderson { .fni8 = gen_shl16_ins_i64, 79309a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 79409a52d85SRichard Henderson .fno = gen_helper_gvec_sli_h, 79509a52d85SRichard Henderson .load_dest = true, 79609a52d85SRichard Henderson .opt_opc = vecop_list, 79709a52d85SRichard Henderson .vece = MO_16 }, 79809a52d85SRichard Henderson { .fni4 = gen_shl32_ins_i32, 79909a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 80009a52d85SRichard Henderson .fno = gen_helper_gvec_sli_s, 80109a52d85SRichard Henderson .load_dest = true, 80209a52d85SRichard Henderson .opt_opc = vecop_list, 80309a52d85SRichard Henderson .vece = MO_32 }, 80409a52d85SRichard Henderson { .fni8 = gen_shl64_ins_i64, 80509a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 80609a52d85SRichard Henderson .fno = gen_helper_gvec_sli_d, 80709a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 80809a52d85SRichard Henderson .load_dest = true, 80909a52d85SRichard Henderson .opt_opc = vecop_list, 81009a52d85SRichard Henderson .vece = MO_64 }, 81109a52d85SRichard Henderson }; 81209a52d85SRichard Henderson 81309a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [0..esize-1]. */ 81409a52d85SRichard Henderson tcg_debug_assert(shift >= 0); 81509a52d85SRichard Henderson tcg_debug_assert(shift < (8 << vece)); 81609a52d85SRichard Henderson 81709a52d85SRichard Henderson if (shift == 0) { 81809a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 81909a52d85SRichard Henderson } else { 82009a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 82109a52d85SRichard Henderson } 82209a52d85SRichard Henderson } 82309a52d85SRichard Henderson 82409a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 82509a52d85SRichard Henderson { 82609a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 82709a52d85SRichard Henderson gen_helper_neon_add_u8(d, d, a); 82809a52d85SRichard Henderson } 82909a52d85SRichard Henderson 83009a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 83109a52d85SRichard Henderson { 83209a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 83309a52d85SRichard Henderson gen_helper_neon_sub_u8(d, d, a); 83409a52d85SRichard Henderson } 83509a52d85SRichard Henderson 83609a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 83709a52d85SRichard Henderson { 83809a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 83909a52d85SRichard Henderson gen_helper_neon_add_u16(d, d, a); 84009a52d85SRichard Henderson } 84109a52d85SRichard Henderson 84209a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 84309a52d85SRichard Henderson { 84409a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 84509a52d85SRichard Henderson gen_helper_neon_sub_u16(d, d, a); 84609a52d85SRichard Henderson } 84709a52d85SRichard Henderson 84809a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 84909a52d85SRichard Henderson { 85009a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 85109a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 85209a52d85SRichard Henderson } 85309a52d85SRichard Henderson 85409a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 85509a52d85SRichard Henderson { 85609a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 85709a52d85SRichard Henderson tcg_gen_sub_i32(d, d, a); 85809a52d85SRichard Henderson } 85909a52d85SRichard Henderson 86009a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 86109a52d85SRichard Henderson { 86209a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 86309a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 86409a52d85SRichard Henderson } 86509a52d85SRichard Henderson 86609a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 86709a52d85SRichard Henderson { 86809a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 86909a52d85SRichard Henderson tcg_gen_sub_i64(d, d, a); 87009a52d85SRichard Henderson } 87109a52d85SRichard Henderson 87209a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 87309a52d85SRichard Henderson { 87409a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 87509a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 87609a52d85SRichard Henderson } 87709a52d85SRichard Henderson 87809a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 87909a52d85SRichard Henderson { 88009a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 88109a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, a); 88209a52d85SRichard Henderson } 88309a52d85SRichard Henderson 88409a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 88509a52d85SRichard Henderson * these tables are shared with AArch64 which does support them. 88609a52d85SRichard Henderson */ 88709a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 88809a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 88909a52d85SRichard Henderson { 89009a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 89109a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_add_vec, 0 89209a52d85SRichard Henderson }; 89309a52d85SRichard Henderson static const GVecGen3 ops[4] = { 89409a52d85SRichard Henderson { .fni4 = gen_mla8_i32, 89509a52d85SRichard Henderson .fniv = gen_mla_vec, 89609a52d85SRichard Henderson .load_dest = true, 89709a52d85SRichard Henderson .opt_opc = vecop_list, 89809a52d85SRichard Henderson .vece = MO_8 }, 89909a52d85SRichard Henderson { .fni4 = gen_mla16_i32, 90009a52d85SRichard Henderson .fniv = gen_mla_vec, 90109a52d85SRichard Henderson .load_dest = true, 90209a52d85SRichard Henderson .opt_opc = vecop_list, 90309a52d85SRichard Henderson .vece = MO_16 }, 90409a52d85SRichard Henderson { .fni4 = gen_mla32_i32, 90509a52d85SRichard Henderson .fniv = gen_mla_vec, 90609a52d85SRichard Henderson .load_dest = true, 90709a52d85SRichard Henderson .opt_opc = vecop_list, 90809a52d85SRichard Henderson .vece = MO_32 }, 90909a52d85SRichard Henderson { .fni8 = gen_mla64_i64, 91009a52d85SRichard Henderson .fniv = gen_mla_vec, 91109a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 91209a52d85SRichard Henderson .load_dest = true, 91309a52d85SRichard Henderson .opt_opc = vecop_list, 91409a52d85SRichard Henderson .vece = MO_64 }, 91509a52d85SRichard Henderson }; 91609a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 91709a52d85SRichard Henderson } 91809a52d85SRichard Henderson 91909a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 92009a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 92109a52d85SRichard Henderson { 92209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 92309a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_sub_vec, 0 92409a52d85SRichard Henderson }; 92509a52d85SRichard Henderson static const GVecGen3 ops[4] = { 92609a52d85SRichard Henderson { .fni4 = gen_mls8_i32, 92709a52d85SRichard Henderson .fniv = gen_mls_vec, 92809a52d85SRichard Henderson .load_dest = true, 92909a52d85SRichard Henderson .opt_opc = vecop_list, 93009a52d85SRichard Henderson .vece = MO_8 }, 93109a52d85SRichard Henderson { .fni4 = gen_mls16_i32, 93209a52d85SRichard Henderson .fniv = gen_mls_vec, 93309a52d85SRichard Henderson .load_dest = true, 93409a52d85SRichard Henderson .opt_opc = vecop_list, 93509a52d85SRichard Henderson .vece = MO_16 }, 93609a52d85SRichard Henderson { .fni4 = gen_mls32_i32, 93709a52d85SRichard Henderson .fniv = gen_mls_vec, 93809a52d85SRichard Henderson .load_dest = true, 93909a52d85SRichard Henderson .opt_opc = vecop_list, 94009a52d85SRichard Henderson .vece = MO_32 }, 94109a52d85SRichard Henderson { .fni8 = gen_mls64_i64, 94209a52d85SRichard Henderson .fniv = gen_mls_vec, 94309a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 94409a52d85SRichard Henderson .load_dest = true, 94509a52d85SRichard Henderson .opt_opc = vecop_list, 94609a52d85SRichard Henderson .vece = MO_64 }, 94709a52d85SRichard Henderson }; 94809a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 94909a52d85SRichard Henderson } 95009a52d85SRichard Henderson 95109a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */ 95209a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 95309a52d85SRichard Henderson { 954013506e0SRichard Henderson tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b); 95509a52d85SRichard Henderson } 95609a52d85SRichard Henderson 95709a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 95809a52d85SRichard Henderson { 959013506e0SRichard Henderson tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b); 96009a52d85SRichard Henderson } 96109a52d85SRichard Henderson 96209a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 96309a52d85SRichard Henderson { 9642310eb0aSRichard Henderson tcg_gen_cmp_vec(TCG_COND_TSTNE, vece, d, a, b); 96509a52d85SRichard Henderson } 96609a52d85SRichard Henderson 96709a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 96809a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 96909a52d85SRichard Henderson { 97009a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 97109a52d85SRichard Henderson static const GVecGen3 ops[4] = { 97209a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u8, 97309a52d85SRichard Henderson .fniv = gen_cmtst_vec, 97409a52d85SRichard Henderson .opt_opc = vecop_list, 97509a52d85SRichard Henderson .vece = MO_8 }, 97609a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u16, 97709a52d85SRichard Henderson .fniv = gen_cmtst_vec, 97809a52d85SRichard Henderson .opt_opc = vecop_list, 97909a52d85SRichard Henderson .vece = MO_16 }, 98009a52d85SRichard Henderson { .fni4 = gen_cmtst_i32, 98109a52d85SRichard Henderson .fniv = gen_cmtst_vec, 98209a52d85SRichard Henderson .opt_opc = vecop_list, 98309a52d85SRichard Henderson .vece = MO_32 }, 98409a52d85SRichard Henderson { .fni8 = gen_cmtst_i64, 98509a52d85SRichard Henderson .fniv = gen_cmtst_vec, 98609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 98709a52d85SRichard Henderson .opt_opc = vecop_list, 98809a52d85SRichard Henderson .vece = MO_64 }, 98909a52d85SRichard Henderson }; 99009a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 99109a52d85SRichard Henderson } 99209a52d85SRichard Henderson 99309a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 99409a52d85SRichard Henderson { 99509a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 99609a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 99709a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 99809a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 99909a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 100009a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(32); 100109a52d85SRichard Henderson 100209a52d85SRichard Henderson /* 100309a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 100409a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 100509a52d85SRichard Henderson * Discard out-of-range results after the fact. 100609a52d85SRichard Henderson */ 100709a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 100809a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 100909a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 101009a52d85SRichard Henderson tcg_gen_shr_i32(rval, src, rsh); 101109a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 101209a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 101309a52d85SRichard Henderson } 101409a52d85SRichard Henderson 101509a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 101609a52d85SRichard Henderson { 101709a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 101809a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 101909a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 102009a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 102109a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 102209a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(64); 102309a52d85SRichard Henderson 102409a52d85SRichard Henderson /* 102509a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 102609a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 102709a52d85SRichard Henderson * Discard out-of-range results after the fact. 102809a52d85SRichard Henderson */ 102909a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 103009a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 103109a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 103209a52d85SRichard Henderson tcg_gen_shr_i64(rval, src, rsh); 103309a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 103409a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 103509a52d85SRichard Henderson } 103609a52d85SRichard Henderson 103709a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 103809a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 103909a52d85SRichard Henderson { 104009a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 104109a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 104209a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 104309a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1044c17e35b8SRichard Henderson TCGv_vec max, zero; 104509a52d85SRichard Henderson 104609a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 104709a52d85SRichard Henderson if (vece == MO_8) { 104809a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 104909a52d85SRichard Henderson } else { 1050143e179cSRichard Henderson TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff); 105109a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, msk); 105209a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, msk); 105309a52d85SRichard Henderson } 105409a52d85SRichard Henderson 105509a52d85SRichard Henderson /* 105609a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 105709a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 105809a52d85SRichard Henderson * Discard out-of-range results after the fact. 105909a52d85SRichard Henderson */ 106009a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 106109a52d85SRichard Henderson tcg_gen_shrv_vec(vece, rval, src, rsh); 106209a52d85SRichard Henderson 106309a52d85SRichard Henderson /* 1064c17e35b8SRichard Henderson * The choice of GE (signed) and GEU (unsigned) are biased toward 106509a52d85SRichard Henderson * the instructions of the x86_64 host. For MO_8, the whole byte 106609a52d85SRichard Henderson * is significant so we must use an unsigned compare; otherwise we 106709a52d85SRichard Henderson * have already masked to a byte and so a signed compare works. 106809a52d85SRichard Henderson * Other tcg hosts have a full set of comparisons and do not care. 106909a52d85SRichard Henderson */ 1070c17e35b8SRichard Henderson zero = tcg_constant_vec_matching(dst, vece, 0); 1071143e179cSRichard Henderson max = tcg_constant_vec_matching(dst, vece, 8 << vece); 107209a52d85SRichard Henderson if (vece == MO_8) { 1073c17e35b8SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, lval, lsh, max, zero, lval); 1074c17e35b8SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, rval, rsh, max, zero, rval); 107509a52d85SRichard Henderson } else { 1076c17e35b8SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_GE, vece, lval, lsh, max, zero, lval); 1077c17e35b8SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_GE, vece, rval, rsh, max, zero, rval); 107809a52d85SRichard Henderson } 107909a52d85SRichard Henderson tcg_gen_or_vec(vece, dst, lval, rval); 108009a52d85SRichard Henderson } 108109a52d85SRichard Henderson 108209a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 108309a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 108409a52d85SRichard Henderson { 108509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 108609a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_shlv_vec, 1087c17e35b8SRichard Henderson INDEX_op_shrv_vec, INDEX_op_cmpsel_vec, 0 108809a52d85SRichard Henderson }; 108909a52d85SRichard Henderson static const GVecGen3 ops[4] = { 109009a52d85SRichard Henderson { .fniv = gen_ushl_vec, 109109a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_b, 109209a52d85SRichard Henderson .opt_opc = vecop_list, 109309a52d85SRichard Henderson .vece = MO_8 }, 109409a52d85SRichard Henderson { .fniv = gen_ushl_vec, 109509a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_h, 109609a52d85SRichard Henderson .opt_opc = vecop_list, 109709a52d85SRichard Henderson .vece = MO_16 }, 109809a52d85SRichard Henderson { .fni4 = gen_ushl_i32, 109909a52d85SRichard Henderson .fniv = gen_ushl_vec, 110009a52d85SRichard Henderson .opt_opc = vecop_list, 110109a52d85SRichard Henderson .vece = MO_32 }, 110209a52d85SRichard Henderson { .fni8 = gen_ushl_i64, 110309a52d85SRichard Henderson .fniv = gen_ushl_vec, 110409a52d85SRichard Henderson .opt_opc = vecop_list, 110509a52d85SRichard Henderson .vece = MO_64 }, 110609a52d85SRichard Henderson }; 110709a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 110809a52d85SRichard Henderson } 110909a52d85SRichard Henderson 111009a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 111109a52d85SRichard Henderson { 111209a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 111309a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 111409a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 111509a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 111609a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 111709a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(31); 111809a52d85SRichard Henderson 111909a52d85SRichard Henderson /* 112009a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 112109a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 112209a52d85SRichard Henderson * Discard out-of-range results after the fact. 112309a52d85SRichard Henderson */ 112409a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 112509a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 112609a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 112709a52d85SRichard Henderson tcg_gen_umin_i32(rsh, rsh, max); 112809a52d85SRichard Henderson tcg_gen_sar_i32(rval, src, rsh); 112909a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 113009a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 113109a52d85SRichard Henderson } 113209a52d85SRichard Henderson 113309a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 113409a52d85SRichard Henderson { 113509a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 113609a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 113709a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 113809a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 113909a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 114009a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(63); 114109a52d85SRichard Henderson 114209a52d85SRichard Henderson /* 114309a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 114409a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 114509a52d85SRichard Henderson * Discard out-of-range results after the fact. 114609a52d85SRichard Henderson */ 114709a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 114809a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 114909a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 115009a52d85SRichard Henderson tcg_gen_umin_i64(rsh, rsh, max); 115109a52d85SRichard Henderson tcg_gen_sar_i64(rval, src, rsh); 115209a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 115309a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 115409a52d85SRichard Henderson } 115509a52d85SRichard Henderson 115609a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 115709a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 115809a52d85SRichard Henderson { 115909a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 116009a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 116109a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 116209a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1163143e179cSRichard Henderson TCGv_vec max, zero; 116409a52d85SRichard Henderson 116509a52d85SRichard Henderson /* 116609a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 116709a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 116809a52d85SRichard Henderson * Discard out-of-range results after the fact. 116909a52d85SRichard Henderson */ 117009a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 117109a52d85SRichard Henderson if (vece == MO_8) { 117209a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 117309a52d85SRichard Henderson } else { 1174143e179cSRichard Henderson TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff); 1175143e179cSRichard Henderson tcg_gen_and_vec(vece, lsh, shift, msk); 1176143e179cSRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, msk); 117709a52d85SRichard Henderson } 117809a52d85SRichard Henderson 117909a52d85SRichard Henderson /* Bound rsh so out of bound right shift gets -1. */ 1180143e179cSRichard Henderson max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1); 1181143e179cSRichard Henderson tcg_gen_umin_vec(vece, rsh, rsh, max); 118209a52d85SRichard Henderson 118309a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 118409a52d85SRichard Henderson tcg_gen_sarv_vec(vece, rval, src, rsh); 118509a52d85SRichard Henderson 118609a52d85SRichard Henderson /* Select in-bound left shift. */ 1187*ee36a772SRichard Henderson zero = tcg_constant_vec_matching(dst, vece, 0); 1188*ee36a772SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_GT, vece, lval, lsh, max, zero, lval); 118909a52d85SRichard Henderson 119009a52d85SRichard Henderson /* Select between left and right shift. */ 119109a52d85SRichard Henderson if (vece == MO_8) { 1192143e179cSRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval); 119309a52d85SRichard Henderson } else { 1194143e179cSRichard Henderson TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80); 1195143e179cSRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval); 119609a52d85SRichard Henderson } 119709a52d85SRichard Henderson } 119809a52d85SRichard Henderson 119909a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 120009a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 120109a52d85SRichard Henderson { 120209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 120309a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 1204*ee36a772SRichard Henderson INDEX_op_sarv_vec, INDEX_op_cmpsel_vec, 0 120509a52d85SRichard Henderson }; 120609a52d85SRichard Henderson static const GVecGen3 ops[4] = { 120709a52d85SRichard Henderson { .fniv = gen_sshl_vec, 120809a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_b, 120909a52d85SRichard Henderson .opt_opc = vecop_list, 121009a52d85SRichard Henderson .vece = MO_8 }, 121109a52d85SRichard Henderson { .fniv = gen_sshl_vec, 121209a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_h, 121309a52d85SRichard Henderson .opt_opc = vecop_list, 121409a52d85SRichard Henderson .vece = MO_16 }, 121509a52d85SRichard Henderson { .fni4 = gen_sshl_i32, 121609a52d85SRichard Henderson .fniv = gen_sshl_vec, 121709a52d85SRichard Henderson .opt_opc = vecop_list, 121809a52d85SRichard Henderson .vece = MO_32 }, 121909a52d85SRichard Henderson { .fni8 = gen_sshl_i64, 122009a52d85SRichard Henderson .fniv = gen_sshl_vec, 122109a52d85SRichard Henderson .opt_opc = vecop_list, 122209a52d85SRichard Henderson .vece = MO_64 }, 122309a52d85SRichard Henderson }; 122409a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 122509a52d85SRichard Henderson } 122609a52d85SRichard Henderson 1227940392c8SRichard Henderson void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1228940392c8SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1229940392c8SRichard Henderson { 1230940392c8SRichard Henderson static gen_helper_gvec_3 * const fns[] = { 1231940392c8SRichard Henderson gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h, 1232940392c8SRichard Henderson gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d, 1233940392c8SRichard Henderson }; 1234940392c8SRichard Henderson tcg_debug_assert(vece <= MO_64); 1235940392c8SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1236940392c8SRichard Henderson } 1237940392c8SRichard Henderson 1238940392c8SRichard Henderson void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1239940392c8SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1240940392c8SRichard Henderson { 1241940392c8SRichard Henderson static gen_helper_gvec_3 * const fns[] = { 1242940392c8SRichard Henderson gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h, 1243940392c8SRichard Henderson gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d, 1244940392c8SRichard Henderson }; 1245940392c8SRichard Henderson tcg_debug_assert(vece <= MO_64); 1246940392c8SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1247940392c8SRichard Henderson } 1248940392c8SRichard Henderson 1249e72a6878SRichard Henderson void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1250e72a6878SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1251e72a6878SRichard Henderson { 1252e72a6878SRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1253e72a6878SRichard Henderson gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h, 1254e72a6878SRichard Henderson gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d, 1255e72a6878SRichard Henderson }; 1256e72a6878SRichard Henderson tcg_debug_assert(vece <= MO_64); 1257e72a6878SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1258e72a6878SRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1259e72a6878SRichard Henderson } 1260e72a6878SRichard Henderson 1261e72a6878SRichard Henderson void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1262e72a6878SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1263e72a6878SRichard Henderson { 1264e72a6878SRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1265e72a6878SRichard Henderson gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h, 1266e72a6878SRichard Henderson gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d, 1267e72a6878SRichard Henderson }; 1268e72a6878SRichard Henderson tcg_debug_assert(vece <= MO_64); 1269e72a6878SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1270e72a6878SRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1271e72a6878SRichard Henderson } 1272e72a6878SRichard Henderson 1273cef9d54fSRichard Henderson void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1274cef9d54fSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1275cef9d54fSRichard Henderson { 1276cef9d54fSRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1277cef9d54fSRichard Henderson gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h, 1278cef9d54fSRichard Henderson gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d, 1279cef9d54fSRichard Henderson }; 1280cef9d54fSRichard Henderson tcg_debug_assert(vece <= MO_64); 1281cef9d54fSRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1282cef9d54fSRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1283cef9d54fSRichard Henderson } 1284cef9d54fSRichard Henderson 1285cef9d54fSRichard Henderson void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1286cef9d54fSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1287cef9d54fSRichard Henderson { 1288cef9d54fSRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1289cef9d54fSRichard Henderson gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h, 1290cef9d54fSRichard Henderson gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d, 1291cef9d54fSRichard Henderson }; 1292cef9d54fSRichard Henderson tcg_debug_assert(vece <= MO_64); 1293cef9d54fSRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1294cef9d54fSRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1295cef9d54fSRichard Henderson } 1296cef9d54fSRichard Henderson 1297f4fa83d6SRichard Henderson void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1298f4fa83d6SRichard Henderson { 1299f4fa83d6SRichard Henderson uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); 1300f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1301f4fa83d6SRichard Henderson 1302f4fa83d6SRichard Henderson tcg_gen_add_i64(tmp, a, b); 1303f4fa83d6SRichard Henderson tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); 1304f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1305f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1306f4fa83d6SRichard Henderson } 1307f4fa83d6SRichard Henderson 1308f4fa83d6SRichard Henderson void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1309f4fa83d6SRichard Henderson { 1310f4fa83d6SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1311f4fa83d6SRichard Henderson 1312f4fa83d6SRichard Henderson tcg_gen_add_i64(t, a, b); 1313f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, 1314f4fa83d6SRichard Henderson tcg_constant_i64(UINT64_MAX), t); 1315f4fa83d6SRichard Henderson tcg_gen_xor_i64(t, t, res); 1316f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t); 1317f4fa83d6SRichard Henderson } 1318f4fa83d6SRichard Henderson 131976f4a8aeSRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 132009a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 132109a52d85SRichard Henderson { 132209a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 132309a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 132409a52d85SRichard Henderson tcg_gen_usadd_vec(vece, t, a, b); 132576f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 132676f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 132709a52d85SRichard Henderson } 132809a52d85SRichard Henderson 132909a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 133009a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 133109a52d85SRichard Henderson { 133209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 133376f4a8aeSRichard Henderson INDEX_op_usadd_vec, INDEX_op_add_vec, 0 133409a52d85SRichard Henderson }; 133509a52d85SRichard Henderson static const GVecGen4 ops[4] = { 133609a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 133709a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_b, 133809a52d85SRichard Henderson .write_aofs = true, 133909a52d85SRichard Henderson .opt_opc = vecop_list, 134009a52d85SRichard Henderson .vece = MO_8 }, 134109a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 134209a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_h, 134309a52d85SRichard Henderson .write_aofs = true, 134409a52d85SRichard Henderson .opt_opc = vecop_list, 134509a52d85SRichard Henderson .vece = MO_16 }, 134609a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 134709a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_s, 134809a52d85SRichard Henderson .write_aofs = true, 134909a52d85SRichard Henderson .opt_opc = vecop_list, 135009a52d85SRichard Henderson .vece = MO_32 }, 135109a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 1352f4fa83d6SRichard Henderson .fni8 = gen_uqadd_d, 135309a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_d, 135409a52d85SRichard Henderson .write_aofs = true, 135509a52d85SRichard Henderson .opt_opc = vecop_list, 135609a52d85SRichard Henderson .vece = MO_64 }, 135709a52d85SRichard Henderson }; 135801d5665bSRichard Henderson 135901d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 136009a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 136109a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 136209a52d85SRichard Henderson } 136309a52d85SRichard Henderson 1364f4fa83d6SRichard Henderson void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1365f4fa83d6SRichard Henderson { 1366f4fa83d6SRichard Henderson int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1367f4fa83d6SRichard Henderson int64_t min = -1ll - max; 1368f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1369f4fa83d6SRichard Henderson 1370f4fa83d6SRichard Henderson tcg_gen_add_i64(tmp, a, b); 1371f4fa83d6SRichard Henderson tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1372f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1373f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1374f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1375f4fa83d6SRichard Henderson } 1376f4fa83d6SRichard Henderson 1377f4fa83d6SRichard Henderson void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1378f4fa83d6SRichard Henderson { 1379f4fa83d6SRichard Henderson TCGv_i64 t0 = tcg_temp_new_i64(); 1380f4fa83d6SRichard Henderson TCGv_i64 t1 = tcg_temp_new_i64(); 1381f4fa83d6SRichard Henderson TCGv_i64 t2 = tcg_temp_new_i64(); 1382f4fa83d6SRichard Henderson 1383f4fa83d6SRichard Henderson tcg_gen_add_i64(t0, a, b); 1384f4fa83d6SRichard Henderson 1385f4fa83d6SRichard Henderson /* Compute signed overflow indication into T1 */ 1386f4fa83d6SRichard Henderson tcg_gen_xor_i64(t1, a, b); 1387f4fa83d6SRichard Henderson tcg_gen_xor_i64(t2, t0, a); 1388f4fa83d6SRichard Henderson tcg_gen_andc_i64(t1, t2, t1); 1389f4fa83d6SRichard Henderson 1390f4fa83d6SRichard Henderson /* Compute saturated value into T2 */ 1391f4fa83d6SRichard Henderson tcg_gen_sari_i64(t2, a, 63); 1392f4fa83d6SRichard Henderson tcg_gen_xori_i64(t2, t2, INT64_MAX); 1393f4fa83d6SRichard Henderson 1394f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1395f4fa83d6SRichard Henderson tcg_gen_xor_i64(t0, t0, res); 1396f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t0); 1397f4fa83d6SRichard Henderson } 1398f4fa83d6SRichard Henderson 139976f4a8aeSRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 140009a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 140109a52d85SRichard Henderson { 140209a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 140309a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 140409a52d85SRichard Henderson tcg_gen_ssadd_vec(vece, t, a, b); 140576f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 140676f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 140709a52d85SRichard Henderson } 140809a52d85SRichard Henderson 140909a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 141009a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 141109a52d85SRichard Henderson { 141209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 141376f4a8aeSRichard Henderson INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 141409a52d85SRichard Henderson }; 141509a52d85SRichard Henderson static const GVecGen4 ops[4] = { 141609a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 141709a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_b, 141809a52d85SRichard Henderson .opt_opc = vecop_list, 141909a52d85SRichard Henderson .write_aofs = true, 142009a52d85SRichard Henderson .vece = MO_8 }, 142109a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 142209a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_h, 142309a52d85SRichard Henderson .opt_opc = vecop_list, 142409a52d85SRichard Henderson .write_aofs = true, 142509a52d85SRichard Henderson .vece = MO_16 }, 142609a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 142709a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_s, 142809a52d85SRichard Henderson .opt_opc = vecop_list, 142909a52d85SRichard Henderson .write_aofs = true, 143009a52d85SRichard Henderson .vece = MO_32 }, 143109a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 1432f4fa83d6SRichard Henderson .fni8 = gen_sqadd_d, 143309a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_d, 143409a52d85SRichard Henderson .opt_opc = vecop_list, 143509a52d85SRichard Henderson .write_aofs = true, 143609a52d85SRichard Henderson .vece = MO_64 }, 143709a52d85SRichard Henderson }; 143801d5665bSRichard Henderson 143901d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 144009a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 144109a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 144209a52d85SRichard Henderson } 144309a52d85SRichard Henderson 1444f4fa83d6SRichard Henderson void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1445f4fa83d6SRichard Henderson { 1446f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1447f4fa83d6SRichard Henderson 1448f4fa83d6SRichard Henderson tcg_gen_sub_i64(tmp, a, b); 1449f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); 1450f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1451f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1452f4fa83d6SRichard Henderson } 1453f4fa83d6SRichard Henderson 1454f4fa83d6SRichard Henderson void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1455f4fa83d6SRichard Henderson { 1456f4fa83d6SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1457f4fa83d6SRichard Henderson 1458f4fa83d6SRichard Henderson tcg_gen_sub_i64(t, a, b); 1459f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); 1460f4fa83d6SRichard Henderson tcg_gen_xor_i64(t, t, res); 1461f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t); 1462f4fa83d6SRichard Henderson } 1463f4fa83d6SRichard Henderson 146476f4a8aeSRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 146509a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 146609a52d85SRichard Henderson { 146709a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 146809a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 146909a52d85SRichard Henderson tcg_gen_ussub_vec(vece, t, a, b); 147076f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 147176f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 147209a52d85SRichard Henderson } 147309a52d85SRichard Henderson 147409a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 147509a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 147609a52d85SRichard Henderson { 147709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 147876f4a8aeSRichard Henderson INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 147909a52d85SRichard Henderson }; 148009a52d85SRichard Henderson static const GVecGen4 ops[4] = { 148109a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 148209a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_b, 148309a52d85SRichard Henderson .opt_opc = vecop_list, 148409a52d85SRichard Henderson .write_aofs = true, 148509a52d85SRichard Henderson .vece = MO_8 }, 148609a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 148709a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_h, 148809a52d85SRichard Henderson .opt_opc = vecop_list, 148909a52d85SRichard Henderson .write_aofs = true, 149009a52d85SRichard Henderson .vece = MO_16 }, 149109a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 149209a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_s, 149309a52d85SRichard Henderson .opt_opc = vecop_list, 149409a52d85SRichard Henderson .write_aofs = true, 149509a52d85SRichard Henderson .vece = MO_32 }, 149609a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 1497f4fa83d6SRichard Henderson .fni8 = gen_uqsub_d, 149809a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_d, 149909a52d85SRichard Henderson .opt_opc = vecop_list, 150009a52d85SRichard Henderson .write_aofs = true, 150109a52d85SRichard Henderson .vece = MO_64 }, 150209a52d85SRichard Henderson }; 150301d5665bSRichard Henderson 150401d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 150509a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 150609a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 150709a52d85SRichard Henderson } 150809a52d85SRichard Henderson 1509f4fa83d6SRichard Henderson void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1510f4fa83d6SRichard Henderson { 1511f4fa83d6SRichard Henderson int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1512f4fa83d6SRichard Henderson int64_t min = -1ll - max; 1513f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1514f4fa83d6SRichard Henderson 1515f4fa83d6SRichard Henderson tcg_gen_sub_i64(tmp, a, b); 1516f4fa83d6SRichard Henderson tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1517f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1518f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1519f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1520f4fa83d6SRichard Henderson } 1521f4fa83d6SRichard Henderson 1522f4fa83d6SRichard Henderson void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1523f4fa83d6SRichard Henderson { 1524f4fa83d6SRichard Henderson TCGv_i64 t0 = tcg_temp_new_i64(); 1525f4fa83d6SRichard Henderson TCGv_i64 t1 = tcg_temp_new_i64(); 1526f4fa83d6SRichard Henderson TCGv_i64 t2 = tcg_temp_new_i64(); 1527f4fa83d6SRichard Henderson 1528f4fa83d6SRichard Henderson tcg_gen_sub_i64(t0, a, b); 1529f4fa83d6SRichard Henderson 1530f4fa83d6SRichard Henderson /* Compute signed overflow indication into T1 */ 1531f4fa83d6SRichard Henderson tcg_gen_xor_i64(t1, a, b); 1532f4fa83d6SRichard Henderson tcg_gen_xor_i64(t2, t0, a); 1533f4fa83d6SRichard Henderson tcg_gen_and_i64(t1, t1, t2); 1534f4fa83d6SRichard Henderson 1535f4fa83d6SRichard Henderson /* Compute saturated value into T2 */ 1536f4fa83d6SRichard Henderson tcg_gen_sari_i64(t2, a, 63); 1537f4fa83d6SRichard Henderson tcg_gen_xori_i64(t2, t2, INT64_MAX); 1538f4fa83d6SRichard Henderson 1539f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1540f4fa83d6SRichard Henderson tcg_gen_xor_i64(t0, t0, res); 1541f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t0); 1542f4fa83d6SRichard Henderson } 1543f4fa83d6SRichard Henderson 154476f4a8aeSRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 154509a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 154609a52d85SRichard Henderson { 154709a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 154809a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 154909a52d85SRichard Henderson tcg_gen_sssub_vec(vece, t, a, b); 155076f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 155176f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 155209a52d85SRichard Henderson } 155309a52d85SRichard Henderson 155409a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 155509a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 155609a52d85SRichard Henderson { 155709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 155876f4a8aeSRichard Henderson INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 155909a52d85SRichard Henderson }; 156009a52d85SRichard Henderson static const GVecGen4 ops[4] = { 156109a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 156209a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_b, 156309a52d85SRichard Henderson .opt_opc = vecop_list, 156409a52d85SRichard Henderson .write_aofs = true, 156509a52d85SRichard Henderson .vece = MO_8 }, 156609a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 156709a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_h, 156809a52d85SRichard Henderson .opt_opc = vecop_list, 156909a52d85SRichard Henderson .write_aofs = true, 157009a52d85SRichard Henderson .vece = MO_16 }, 157109a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 157209a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_s, 157309a52d85SRichard Henderson .opt_opc = vecop_list, 157409a52d85SRichard Henderson .write_aofs = true, 157509a52d85SRichard Henderson .vece = MO_32 }, 157609a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 1577f4fa83d6SRichard Henderson .fni8 = gen_sqsub_d, 157809a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_d, 157909a52d85SRichard Henderson .opt_opc = vecop_list, 158009a52d85SRichard Henderson .write_aofs = true, 158109a52d85SRichard Henderson .vece = MO_64 }, 158209a52d85SRichard Henderson }; 158301d5665bSRichard Henderson 158401d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 158509a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 158609a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 158709a52d85SRichard Henderson } 158809a52d85SRichard Henderson 158909a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 159009a52d85SRichard Henderson { 159109a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 159209a52d85SRichard Henderson 159309a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 159409a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 159509a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 159609a52d85SRichard Henderson } 159709a52d85SRichard Henderson 159809a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 159909a52d85SRichard Henderson { 160009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 160109a52d85SRichard Henderson 160209a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 160309a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 160409a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 160509a52d85SRichard Henderson } 160609a52d85SRichard Henderson 160709a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 160809a52d85SRichard Henderson { 160909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 161009a52d85SRichard Henderson 161109a52d85SRichard Henderson tcg_gen_smin_vec(vece, t, a, b); 161209a52d85SRichard Henderson tcg_gen_smax_vec(vece, d, a, b); 161309a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 161409a52d85SRichard Henderson } 161509a52d85SRichard Henderson 161609a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 161709a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 161809a52d85SRichard Henderson { 161909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 162009a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 162109a52d85SRichard Henderson }; 162209a52d85SRichard Henderson static const GVecGen3 ops[4] = { 162309a52d85SRichard Henderson { .fniv = gen_sabd_vec, 162409a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_b, 162509a52d85SRichard Henderson .opt_opc = vecop_list, 162609a52d85SRichard Henderson .vece = MO_8 }, 162709a52d85SRichard Henderson { .fniv = gen_sabd_vec, 162809a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_h, 162909a52d85SRichard Henderson .opt_opc = vecop_list, 163009a52d85SRichard Henderson .vece = MO_16 }, 163109a52d85SRichard Henderson { .fni4 = gen_sabd_i32, 163209a52d85SRichard Henderson .fniv = gen_sabd_vec, 163309a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_s, 163409a52d85SRichard Henderson .opt_opc = vecop_list, 163509a52d85SRichard Henderson .vece = MO_32 }, 163609a52d85SRichard Henderson { .fni8 = gen_sabd_i64, 163709a52d85SRichard Henderson .fniv = gen_sabd_vec, 163809a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_d, 163909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 164009a52d85SRichard Henderson .opt_opc = vecop_list, 164109a52d85SRichard Henderson .vece = MO_64 }, 164209a52d85SRichard Henderson }; 164309a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 164409a52d85SRichard Henderson } 164509a52d85SRichard Henderson 164609a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 164709a52d85SRichard Henderson { 164809a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 164909a52d85SRichard Henderson 165009a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 165109a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 165209a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 165309a52d85SRichard Henderson } 165409a52d85SRichard Henderson 165509a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 165609a52d85SRichard Henderson { 165709a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 165809a52d85SRichard Henderson 165909a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 166009a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 166109a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 166209a52d85SRichard Henderson } 166309a52d85SRichard Henderson 166409a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 166509a52d85SRichard Henderson { 166609a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 166709a52d85SRichard Henderson 166809a52d85SRichard Henderson tcg_gen_umin_vec(vece, t, a, b); 166909a52d85SRichard Henderson tcg_gen_umax_vec(vece, d, a, b); 167009a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 167109a52d85SRichard Henderson } 167209a52d85SRichard Henderson 167309a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 167409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 167509a52d85SRichard Henderson { 167609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 167709a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 167809a52d85SRichard Henderson }; 167909a52d85SRichard Henderson static const GVecGen3 ops[4] = { 168009a52d85SRichard Henderson { .fniv = gen_uabd_vec, 168109a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_b, 168209a52d85SRichard Henderson .opt_opc = vecop_list, 168309a52d85SRichard Henderson .vece = MO_8 }, 168409a52d85SRichard Henderson { .fniv = gen_uabd_vec, 168509a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_h, 168609a52d85SRichard Henderson .opt_opc = vecop_list, 168709a52d85SRichard Henderson .vece = MO_16 }, 168809a52d85SRichard Henderson { .fni4 = gen_uabd_i32, 168909a52d85SRichard Henderson .fniv = gen_uabd_vec, 169009a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_s, 169109a52d85SRichard Henderson .opt_opc = vecop_list, 169209a52d85SRichard Henderson .vece = MO_32 }, 169309a52d85SRichard Henderson { .fni8 = gen_uabd_i64, 169409a52d85SRichard Henderson .fniv = gen_uabd_vec, 169509a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_d, 169609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 169709a52d85SRichard Henderson .opt_opc = vecop_list, 169809a52d85SRichard Henderson .vece = MO_64 }, 169909a52d85SRichard Henderson }; 170009a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 170109a52d85SRichard Henderson } 170209a52d85SRichard Henderson 170309a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 170409a52d85SRichard Henderson { 170509a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 170609a52d85SRichard Henderson gen_sabd_i32(t, a, b); 170709a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 170809a52d85SRichard Henderson } 170909a52d85SRichard Henderson 171009a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 171109a52d85SRichard Henderson { 171209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 171309a52d85SRichard Henderson gen_sabd_i64(t, a, b); 171409a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 171509a52d85SRichard Henderson } 171609a52d85SRichard Henderson 171709a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 171809a52d85SRichard Henderson { 171909a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 172009a52d85SRichard Henderson gen_sabd_vec(vece, t, a, b); 172109a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 172209a52d85SRichard Henderson } 172309a52d85SRichard Henderson 172409a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 172509a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 172609a52d85SRichard Henderson { 172709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 172809a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 172909a52d85SRichard Henderson INDEX_op_smin_vec, INDEX_op_smax_vec, 0 173009a52d85SRichard Henderson }; 173109a52d85SRichard Henderson static const GVecGen3 ops[4] = { 173209a52d85SRichard Henderson { .fniv = gen_saba_vec, 173309a52d85SRichard Henderson .fno = gen_helper_gvec_saba_b, 173409a52d85SRichard Henderson .opt_opc = vecop_list, 173509a52d85SRichard Henderson .load_dest = true, 173609a52d85SRichard Henderson .vece = MO_8 }, 173709a52d85SRichard Henderson { .fniv = gen_saba_vec, 173809a52d85SRichard Henderson .fno = gen_helper_gvec_saba_h, 173909a52d85SRichard Henderson .opt_opc = vecop_list, 174009a52d85SRichard Henderson .load_dest = true, 174109a52d85SRichard Henderson .vece = MO_16 }, 174209a52d85SRichard Henderson { .fni4 = gen_saba_i32, 174309a52d85SRichard Henderson .fniv = gen_saba_vec, 174409a52d85SRichard Henderson .fno = gen_helper_gvec_saba_s, 174509a52d85SRichard Henderson .opt_opc = vecop_list, 174609a52d85SRichard Henderson .load_dest = true, 174709a52d85SRichard Henderson .vece = MO_32 }, 174809a52d85SRichard Henderson { .fni8 = gen_saba_i64, 174909a52d85SRichard Henderson .fniv = gen_saba_vec, 175009a52d85SRichard Henderson .fno = gen_helper_gvec_saba_d, 175109a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 175209a52d85SRichard Henderson .opt_opc = vecop_list, 175309a52d85SRichard Henderson .load_dest = true, 175409a52d85SRichard Henderson .vece = MO_64 }, 175509a52d85SRichard Henderson }; 175609a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 175709a52d85SRichard Henderson } 175809a52d85SRichard Henderson 175909a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 176009a52d85SRichard Henderson { 176109a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 176209a52d85SRichard Henderson gen_uabd_i32(t, a, b); 176309a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 176409a52d85SRichard Henderson } 176509a52d85SRichard Henderson 176609a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 176709a52d85SRichard Henderson { 176809a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 176909a52d85SRichard Henderson gen_uabd_i64(t, a, b); 177009a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 177109a52d85SRichard Henderson } 177209a52d85SRichard Henderson 177309a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 177409a52d85SRichard Henderson { 177509a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 177609a52d85SRichard Henderson gen_uabd_vec(vece, t, a, b); 177709a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 177809a52d85SRichard Henderson } 177909a52d85SRichard Henderson 178009a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 178109a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 178209a52d85SRichard Henderson { 178309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 178409a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 178509a52d85SRichard Henderson INDEX_op_umin_vec, INDEX_op_umax_vec, 0 178609a52d85SRichard Henderson }; 178709a52d85SRichard Henderson static const GVecGen3 ops[4] = { 178809a52d85SRichard Henderson { .fniv = gen_uaba_vec, 178909a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_b, 179009a52d85SRichard Henderson .opt_opc = vecop_list, 179109a52d85SRichard Henderson .load_dest = true, 179209a52d85SRichard Henderson .vece = MO_8 }, 179309a52d85SRichard Henderson { .fniv = gen_uaba_vec, 179409a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_h, 179509a52d85SRichard Henderson .opt_opc = vecop_list, 179609a52d85SRichard Henderson .load_dest = true, 179709a52d85SRichard Henderson .vece = MO_16 }, 179809a52d85SRichard Henderson { .fni4 = gen_uaba_i32, 179909a52d85SRichard Henderson .fniv = gen_uaba_vec, 180009a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_s, 180109a52d85SRichard Henderson .opt_opc = vecop_list, 180209a52d85SRichard Henderson .load_dest = true, 180309a52d85SRichard Henderson .vece = MO_32 }, 180409a52d85SRichard Henderson { .fni8 = gen_uaba_i64, 180509a52d85SRichard Henderson .fniv = gen_uaba_vec, 180609a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_d, 180709a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 180809a52d85SRichard Henderson .opt_opc = vecop_list, 180909a52d85SRichard Henderson .load_dest = true, 181009a52d85SRichard Henderson .vece = MO_64 }, 181109a52d85SRichard Henderson }; 181209a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 181309a52d85SRichard Henderson } 1814a7e4eec6SRichard Henderson 1815a7e4eec6SRichard Henderson void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1816a7e4eec6SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1817a7e4eec6SRichard Henderson { 1818a7e4eec6SRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 1819a7e4eec6SRichard Henderson gen_helper_gvec_addp_b, 1820a7e4eec6SRichard Henderson gen_helper_gvec_addp_h, 1821a7e4eec6SRichard Henderson gen_helper_gvec_addp_s, 1822a7e4eec6SRichard Henderson gen_helper_gvec_addp_d, 1823a7e4eec6SRichard Henderson }; 1824a7e4eec6SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1825a7e4eec6SRichard Henderson } 182628b5451bSRichard Henderson 182728b5451bSRichard Henderson void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 182828b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 182928b5451bSRichard Henderson { 183028b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 183128b5451bSRichard Henderson gen_helper_gvec_smaxp_b, 183228b5451bSRichard Henderson gen_helper_gvec_smaxp_h, 183328b5451bSRichard Henderson gen_helper_gvec_smaxp_s, 183428b5451bSRichard Henderson }; 183528b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 183628b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 183728b5451bSRichard Henderson } 183828b5451bSRichard Henderson 183928b5451bSRichard Henderson void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 184028b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 184128b5451bSRichard Henderson { 184228b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 184328b5451bSRichard Henderson gen_helper_gvec_sminp_b, 184428b5451bSRichard Henderson gen_helper_gvec_sminp_h, 184528b5451bSRichard Henderson gen_helper_gvec_sminp_s, 184628b5451bSRichard Henderson }; 184728b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 184828b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 184928b5451bSRichard Henderson } 185028b5451bSRichard Henderson 185128b5451bSRichard Henderson void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 185228b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 185328b5451bSRichard Henderson { 185428b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 185528b5451bSRichard Henderson gen_helper_gvec_umaxp_b, 185628b5451bSRichard Henderson gen_helper_gvec_umaxp_h, 185728b5451bSRichard Henderson gen_helper_gvec_umaxp_s, 185828b5451bSRichard Henderson }; 185928b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 186028b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 186128b5451bSRichard Henderson } 186228b5451bSRichard Henderson 186328b5451bSRichard Henderson void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 186428b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 186528b5451bSRichard Henderson { 186628b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 186728b5451bSRichard Henderson gen_helper_gvec_uminp_b, 186828b5451bSRichard Henderson gen_helper_gvec_uminp_h, 186928b5451bSRichard Henderson gen_helper_gvec_uminp_s, 187028b5451bSRichard Henderson }; 187128b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 187228b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 187328b5451bSRichard Henderson } 1874203aca91SRichard Henderson 1875203aca91SRichard Henderson static void gen_shadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1876203aca91SRichard Henderson { 1877203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1878203aca91SRichard Henderson 1879203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1880203aca91SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, 1); 1881203aca91SRichard Henderson tcg_gen_vec_sar8i_i64(b, b, 1); 1882203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 1883203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 1884203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 1885203aca91SRichard Henderson } 1886203aca91SRichard Henderson 1887203aca91SRichard Henderson static void gen_shadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1888203aca91SRichard Henderson { 1889203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1890203aca91SRichard Henderson 1891203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1892203aca91SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, 1); 1893203aca91SRichard Henderson tcg_gen_vec_sar16i_i64(b, b, 1); 1894203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 1895203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 1896203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 1897203aca91SRichard Henderson } 1898203aca91SRichard Henderson 1899203aca91SRichard Henderson static void gen_shadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1900203aca91SRichard Henderson { 1901203aca91SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 1902203aca91SRichard Henderson 1903203aca91SRichard Henderson tcg_gen_and_i32(t, a, b); 1904203aca91SRichard Henderson tcg_gen_sari_i32(a, a, 1); 1905203aca91SRichard Henderson tcg_gen_sari_i32(b, b, 1); 1906203aca91SRichard Henderson tcg_gen_andi_i32(t, t, 1); 1907203aca91SRichard Henderson tcg_gen_add_i32(d, a, b); 1908203aca91SRichard Henderson tcg_gen_add_i32(d, d, t); 1909203aca91SRichard Henderson } 1910203aca91SRichard Henderson 1911203aca91SRichard Henderson static void gen_shadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1912203aca91SRichard Henderson { 1913203aca91SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 1914203aca91SRichard Henderson 1915203aca91SRichard Henderson tcg_gen_and_vec(vece, t, a, b); 1916203aca91SRichard Henderson tcg_gen_sari_vec(vece, a, a, 1); 1917203aca91SRichard Henderson tcg_gen_sari_vec(vece, b, b, 1); 1918203aca91SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 1919203aca91SRichard Henderson tcg_gen_add_vec(vece, d, a, b); 1920203aca91SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 1921203aca91SRichard Henderson } 1922203aca91SRichard Henderson 1923203aca91SRichard Henderson void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1924203aca91SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1925203aca91SRichard Henderson { 1926203aca91SRichard Henderson static const TCGOpcode vecop_list[] = { 1927203aca91SRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 1928203aca91SRichard Henderson }; 1929203aca91SRichard Henderson static const GVecGen3 g[] = { 1930203aca91SRichard Henderson { .fni8 = gen_shadd8_i64, 1931203aca91SRichard Henderson .fniv = gen_shadd_vec, 1932203aca91SRichard Henderson .opt_opc = vecop_list, 1933203aca91SRichard Henderson .vece = MO_8 }, 1934203aca91SRichard Henderson { .fni8 = gen_shadd16_i64, 1935203aca91SRichard Henderson .fniv = gen_shadd_vec, 1936203aca91SRichard Henderson .opt_opc = vecop_list, 1937203aca91SRichard Henderson .vece = MO_16 }, 1938203aca91SRichard Henderson { .fni4 = gen_shadd_i32, 1939203aca91SRichard Henderson .fniv = gen_shadd_vec, 1940203aca91SRichard Henderson .opt_opc = vecop_list, 1941203aca91SRichard Henderson .vece = MO_32 }, 1942203aca91SRichard Henderson }; 1943203aca91SRichard Henderson tcg_debug_assert(vece <= MO_32); 1944203aca91SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 1945203aca91SRichard Henderson } 1946203aca91SRichard Henderson 1947203aca91SRichard Henderson static void gen_uhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1948203aca91SRichard Henderson { 1949203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1950203aca91SRichard Henderson 1951203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1952203aca91SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, 1); 1953203aca91SRichard Henderson tcg_gen_vec_shr8i_i64(b, b, 1); 1954203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 1955203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 1956203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 1957203aca91SRichard Henderson } 1958203aca91SRichard Henderson 1959203aca91SRichard Henderson static void gen_uhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1960203aca91SRichard Henderson { 1961203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1962203aca91SRichard Henderson 1963203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1964203aca91SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, 1); 1965203aca91SRichard Henderson tcg_gen_vec_shr16i_i64(b, b, 1); 1966203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 1967203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 1968203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 1969203aca91SRichard Henderson } 1970203aca91SRichard Henderson 1971203aca91SRichard Henderson static void gen_uhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1972203aca91SRichard Henderson { 1973203aca91SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 1974203aca91SRichard Henderson 1975203aca91SRichard Henderson tcg_gen_and_i32(t, a, b); 1976203aca91SRichard Henderson tcg_gen_shri_i32(a, a, 1); 1977203aca91SRichard Henderson tcg_gen_shri_i32(b, b, 1); 1978203aca91SRichard Henderson tcg_gen_andi_i32(t, t, 1); 1979203aca91SRichard Henderson tcg_gen_add_i32(d, a, b); 1980203aca91SRichard Henderson tcg_gen_add_i32(d, d, t); 1981203aca91SRichard Henderson } 1982203aca91SRichard Henderson 1983203aca91SRichard Henderson static void gen_uhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1984203aca91SRichard Henderson { 1985203aca91SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 1986203aca91SRichard Henderson 1987203aca91SRichard Henderson tcg_gen_and_vec(vece, t, a, b); 1988203aca91SRichard Henderson tcg_gen_shri_vec(vece, a, a, 1); 1989203aca91SRichard Henderson tcg_gen_shri_vec(vece, b, b, 1); 1990203aca91SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 1991203aca91SRichard Henderson tcg_gen_add_vec(vece, d, a, b); 1992203aca91SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 1993203aca91SRichard Henderson } 1994203aca91SRichard Henderson 1995203aca91SRichard Henderson void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1996203aca91SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1997203aca91SRichard Henderson { 1998203aca91SRichard Henderson static const TCGOpcode vecop_list[] = { 1999203aca91SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 2000203aca91SRichard Henderson }; 2001203aca91SRichard Henderson static const GVecGen3 g[] = { 2002203aca91SRichard Henderson { .fni8 = gen_uhadd8_i64, 2003203aca91SRichard Henderson .fniv = gen_uhadd_vec, 2004203aca91SRichard Henderson .opt_opc = vecop_list, 2005203aca91SRichard Henderson .vece = MO_8 }, 2006203aca91SRichard Henderson { .fni8 = gen_uhadd16_i64, 2007203aca91SRichard Henderson .fniv = gen_uhadd_vec, 2008203aca91SRichard Henderson .opt_opc = vecop_list, 2009203aca91SRichard Henderson .vece = MO_16 }, 2010203aca91SRichard Henderson { .fni4 = gen_uhadd_i32, 2011203aca91SRichard Henderson .fniv = gen_uhadd_vec, 2012203aca91SRichard Henderson .opt_opc = vecop_list, 2013203aca91SRichard Henderson .vece = MO_32 }, 2014203aca91SRichard Henderson }; 2015203aca91SRichard Henderson tcg_debug_assert(vece <= MO_32); 2016203aca91SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 2017203aca91SRichard Henderson } 201834c0d865SRichard Henderson 201934c0d865SRichard Henderson static void gen_shsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 202034c0d865SRichard Henderson { 202134c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 202234c0d865SRichard Henderson 202334c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 202434c0d865SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, 1); 202534c0d865SRichard Henderson tcg_gen_vec_sar8i_i64(b, b, 1); 202634c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 202734c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, a, b); 202834c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, d, t); 202934c0d865SRichard Henderson } 203034c0d865SRichard Henderson 203134c0d865SRichard Henderson static void gen_shsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 203234c0d865SRichard Henderson { 203334c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 203434c0d865SRichard Henderson 203534c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 203634c0d865SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, 1); 203734c0d865SRichard Henderson tcg_gen_vec_sar16i_i64(b, b, 1); 203834c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 203934c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, a, b); 204034c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, d, t); 204134c0d865SRichard Henderson } 204234c0d865SRichard Henderson 204334c0d865SRichard Henderson static void gen_shsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 204434c0d865SRichard Henderson { 204534c0d865SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 204634c0d865SRichard Henderson 204734c0d865SRichard Henderson tcg_gen_andc_i32(t, b, a); 204834c0d865SRichard Henderson tcg_gen_sari_i32(a, a, 1); 204934c0d865SRichard Henderson tcg_gen_sari_i32(b, b, 1); 205034c0d865SRichard Henderson tcg_gen_andi_i32(t, t, 1); 205134c0d865SRichard Henderson tcg_gen_sub_i32(d, a, b); 205234c0d865SRichard Henderson tcg_gen_sub_i32(d, d, t); 205334c0d865SRichard Henderson } 205434c0d865SRichard Henderson 205534c0d865SRichard Henderson static void gen_shsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 205634c0d865SRichard Henderson { 205734c0d865SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 205834c0d865SRichard Henderson 205934c0d865SRichard Henderson tcg_gen_andc_vec(vece, t, b, a); 206034c0d865SRichard Henderson tcg_gen_sari_vec(vece, a, a, 1); 206134c0d865SRichard Henderson tcg_gen_sari_vec(vece, b, b, 1); 206234c0d865SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 206334c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, a, b); 206434c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 206534c0d865SRichard Henderson } 206634c0d865SRichard Henderson 206734c0d865SRichard Henderson void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 206834c0d865SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 206934c0d865SRichard Henderson { 207034c0d865SRichard Henderson static const TCGOpcode vecop_list[] = { 207134c0d865SRichard Henderson INDEX_op_sari_vec, INDEX_op_sub_vec, 0 207234c0d865SRichard Henderson }; 207334c0d865SRichard Henderson static const GVecGen3 g[4] = { 207434c0d865SRichard Henderson { .fni8 = gen_shsub8_i64, 207534c0d865SRichard Henderson .fniv = gen_shsub_vec, 207634c0d865SRichard Henderson .opt_opc = vecop_list, 207734c0d865SRichard Henderson .vece = MO_8 }, 207834c0d865SRichard Henderson { .fni8 = gen_shsub16_i64, 207934c0d865SRichard Henderson .fniv = gen_shsub_vec, 208034c0d865SRichard Henderson .opt_opc = vecop_list, 208134c0d865SRichard Henderson .vece = MO_16 }, 208234c0d865SRichard Henderson { .fni4 = gen_shsub_i32, 208334c0d865SRichard Henderson .fniv = gen_shsub_vec, 208434c0d865SRichard Henderson .opt_opc = vecop_list, 208534c0d865SRichard Henderson .vece = MO_32 }, 208634c0d865SRichard Henderson }; 208734c0d865SRichard Henderson assert(vece <= MO_32); 208834c0d865SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 208934c0d865SRichard Henderson } 209034c0d865SRichard Henderson 209134c0d865SRichard Henderson static void gen_uhsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 209234c0d865SRichard Henderson { 209334c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 209434c0d865SRichard Henderson 209534c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 209634c0d865SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, 1); 209734c0d865SRichard Henderson tcg_gen_vec_shr8i_i64(b, b, 1); 209834c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 209934c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, a, b); 210034c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, d, t); 210134c0d865SRichard Henderson } 210234c0d865SRichard Henderson 210334c0d865SRichard Henderson static void gen_uhsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 210434c0d865SRichard Henderson { 210534c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 210634c0d865SRichard Henderson 210734c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 210834c0d865SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, 1); 210934c0d865SRichard Henderson tcg_gen_vec_shr16i_i64(b, b, 1); 211034c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 211134c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, a, b); 211234c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, d, t); 211334c0d865SRichard Henderson } 211434c0d865SRichard Henderson 211534c0d865SRichard Henderson static void gen_uhsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 211634c0d865SRichard Henderson { 211734c0d865SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 211834c0d865SRichard Henderson 211934c0d865SRichard Henderson tcg_gen_andc_i32(t, b, a); 212034c0d865SRichard Henderson tcg_gen_shri_i32(a, a, 1); 212134c0d865SRichard Henderson tcg_gen_shri_i32(b, b, 1); 212234c0d865SRichard Henderson tcg_gen_andi_i32(t, t, 1); 212334c0d865SRichard Henderson tcg_gen_sub_i32(d, a, b); 212434c0d865SRichard Henderson tcg_gen_sub_i32(d, d, t); 212534c0d865SRichard Henderson } 212634c0d865SRichard Henderson 212734c0d865SRichard Henderson static void gen_uhsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 212834c0d865SRichard Henderson { 212934c0d865SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 213034c0d865SRichard Henderson 213134c0d865SRichard Henderson tcg_gen_andc_vec(vece, t, b, a); 213234c0d865SRichard Henderson tcg_gen_shri_vec(vece, a, a, 1); 213334c0d865SRichard Henderson tcg_gen_shri_vec(vece, b, b, 1); 213434c0d865SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 213534c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, a, b); 213634c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 213734c0d865SRichard Henderson } 213834c0d865SRichard Henderson 213934c0d865SRichard Henderson void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 214034c0d865SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 214134c0d865SRichard Henderson { 214234c0d865SRichard Henderson static const TCGOpcode vecop_list[] = { 214334c0d865SRichard Henderson INDEX_op_shri_vec, INDEX_op_sub_vec, 0 214434c0d865SRichard Henderson }; 214534c0d865SRichard Henderson static const GVecGen3 g[4] = { 214634c0d865SRichard Henderson { .fni8 = gen_uhsub8_i64, 214734c0d865SRichard Henderson .fniv = gen_uhsub_vec, 214834c0d865SRichard Henderson .opt_opc = vecop_list, 214934c0d865SRichard Henderson .vece = MO_8 }, 215034c0d865SRichard Henderson { .fni8 = gen_uhsub16_i64, 215134c0d865SRichard Henderson .fniv = gen_uhsub_vec, 215234c0d865SRichard Henderson .opt_opc = vecop_list, 215334c0d865SRichard Henderson .vece = MO_16 }, 215434c0d865SRichard Henderson { .fni4 = gen_uhsub_i32, 215534c0d865SRichard Henderson .fniv = gen_uhsub_vec, 215634c0d865SRichard Henderson .opt_opc = vecop_list, 215734c0d865SRichard Henderson .vece = MO_32 }, 215834c0d865SRichard Henderson }; 215934c0d865SRichard Henderson assert(vece <= MO_32); 216034c0d865SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 216134c0d865SRichard Henderson } 21628989b95eSRichard Henderson 21638989b95eSRichard Henderson static void gen_srhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 21648989b95eSRichard Henderson { 21658989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 21668989b95eSRichard Henderson 21678989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 21688989b95eSRichard Henderson tcg_gen_vec_sar8i_i64(a, a, 1); 21698989b95eSRichard Henderson tcg_gen_vec_sar8i_i64(b, b, 1); 21708989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 21718989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 21728989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 21738989b95eSRichard Henderson } 21748989b95eSRichard Henderson 21758989b95eSRichard Henderson static void gen_srhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 21768989b95eSRichard Henderson { 21778989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 21788989b95eSRichard Henderson 21798989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 21808989b95eSRichard Henderson tcg_gen_vec_sar16i_i64(a, a, 1); 21818989b95eSRichard Henderson tcg_gen_vec_sar16i_i64(b, b, 1); 21828989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 21838989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 21848989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 21858989b95eSRichard Henderson } 21868989b95eSRichard Henderson 21878989b95eSRichard Henderson static void gen_srhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 21888989b95eSRichard Henderson { 21898989b95eSRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 21908989b95eSRichard Henderson 21918989b95eSRichard Henderson tcg_gen_or_i32(t, a, b); 21928989b95eSRichard Henderson tcg_gen_sari_i32(a, a, 1); 21938989b95eSRichard Henderson tcg_gen_sari_i32(b, b, 1); 21948989b95eSRichard Henderson tcg_gen_andi_i32(t, t, 1); 21958989b95eSRichard Henderson tcg_gen_add_i32(d, a, b); 21968989b95eSRichard Henderson tcg_gen_add_i32(d, d, t); 21978989b95eSRichard Henderson } 21988989b95eSRichard Henderson 21998989b95eSRichard Henderson static void gen_srhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 22008989b95eSRichard Henderson { 22018989b95eSRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 22028989b95eSRichard Henderson 22038989b95eSRichard Henderson tcg_gen_or_vec(vece, t, a, b); 22048989b95eSRichard Henderson tcg_gen_sari_vec(vece, a, a, 1); 22058989b95eSRichard Henderson tcg_gen_sari_vec(vece, b, b, 1); 22068989b95eSRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 22078989b95eSRichard Henderson tcg_gen_add_vec(vece, d, a, b); 22088989b95eSRichard Henderson tcg_gen_add_vec(vece, d, d, t); 22098989b95eSRichard Henderson } 22108989b95eSRichard Henderson 22118989b95eSRichard Henderson void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 22128989b95eSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 22138989b95eSRichard Henderson { 22148989b95eSRichard Henderson static const TCGOpcode vecop_list[] = { 22158989b95eSRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 22168989b95eSRichard Henderson }; 22178989b95eSRichard Henderson static const GVecGen3 g[] = { 22188989b95eSRichard Henderson { .fni8 = gen_srhadd8_i64, 22198989b95eSRichard Henderson .fniv = gen_srhadd_vec, 22208989b95eSRichard Henderson .opt_opc = vecop_list, 22218989b95eSRichard Henderson .vece = MO_8 }, 22228989b95eSRichard Henderson { .fni8 = gen_srhadd16_i64, 22238989b95eSRichard Henderson .fniv = gen_srhadd_vec, 22248989b95eSRichard Henderson .opt_opc = vecop_list, 22258989b95eSRichard Henderson .vece = MO_16 }, 22268989b95eSRichard Henderson { .fni4 = gen_srhadd_i32, 22278989b95eSRichard Henderson .fniv = gen_srhadd_vec, 22288989b95eSRichard Henderson .opt_opc = vecop_list, 22298989b95eSRichard Henderson .vece = MO_32 }, 22308989b95eSRichard Henderson }; 22318989b95eSRichard Henderson assert(vece <= MO_32); 22328989b95eSRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 22338989b95eSRichard Henderson } 22348989b95eSRichard Henderson 22358989b95eSRichard Henderson static void gen_urhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 22368989b95eSRichard Henderson { 22378989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 22388989b95eSRichard Henderson 22398989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 22408989b95eSRichard Henderson tcg_gen_vec_shr8i_i64(a, a, 1); 22418989b95eSRichard Henderson tcg_gen_vec_shr8i_i64(b, b, 1); 22428989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 22438989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 22448989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 22458989b95eSRichard Henderson } 22468989b95eSRichard Henderson 22478989b95eSRichard Henderson static void gen_urhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 22488989b95eSRichard Henderson { 22498989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 22508989b95eSRichard Henderson 22518989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 22528989b95eSRichard Henderson tcg_gen_vec_shr16i_i64(a, a, 1); 22538989b95eSRichard Henderson tcg_gen_vec_shr16i_i64(b, b, 1); 22548989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 22558989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 22568989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 22578989b95eSRichard Henderson } 22588989b95eSRichard Henderson 22598989b95eSRichard Henderson static void gen_urhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 22608989b95eSRichard Henderson { 22618989b95eSRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 22628989b95eSRichard Henderson 22638989b95eSRichard Henderson tcg_gen_or_i32(t, a, b); 22648989b95eSRichard Henderson tcg_gen_shri_i32(a, a, 1); 22658989b95eSRichard Henderson tcg_gen_shri_i32(b, b, 1); 22668989b95eSRichard Henderson tcg_gen_andi_i32(t, t, 1); 22678989b95eSRichard Henderson tcg_gen_add_i32(d, a, b); 22688989b95eSRichard Henderson tcg_gen_add_i32(d, d, t); 22698989b95eSRichard Henderson } 22708989b95eSRichard Henderson 22718989b95eSRichard Henderson static void gen_urhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 22728989b95eSRichard Henderson { 22738989b95eSRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 22748989b95eSRichard Henderson 22758989b95eSRichard Henderson tcg_gen_or_vec(vece, t, a, b); 22768989b95eSRichard Henderson tcg_gen_shri_vec(vece, a, a, 1); 22778989b95eSRichard Henderson tcg_gen_shri_vec(vece, b, b, 1); 22788989b95eSRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 22798989b95eSRichard Henderson tcg_gen_add_vec(vece, d, a, b); 22808989b95eSRichard Henderson tcg_gen_add_vec(vece, d, d, t); 22818989b95eSRichard Henderson } 22828989b95eSRichard Henderson 22838989b95eSRichard Henderson void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 22848989b95eSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 22858989b95eSRichard Henderson { 22868989b95eSRichard Henderson static const TCGOpcode vecop_list[] = { 22878989b95eSRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 22888989b95eSRichard Henderson }; 22898989b95eSRichard Henderson static const GVecGen3 g[] = { 22908989b95eSRichard Henderson { .fni8 = gen_urhadd8_i64, 22918989b95eSRichard Henderson .fniv = gen_urhadd_vec, 22928989b95eSRichard Henderson .opt_opc = vecop_list, 22938989b95eSRichard Henderson .vece = MO_8 }, 22948989b95eSRichard Henderson { .fni8 = gen_urhadd16_i64, 22958989b95eSRichard Henderson .fniv = gen_urhadd_vec, 22968989b95eSRichard Henderson .opt_opc = vecop_list, 22978989b95eSRichard Henderson .vece = MO_16 }, 22988989b95eSRichard Henderson { .fni4 = gen_urhadd_i32, 22998989b95eSRichard Henderson .fniv = gen_urhadd_vec, 23008989b95eSRichard Henderson .opt_opc = vecop_list, 23018989b95eSRichard Henderson .vece = MO_32 }, 23028989b95eSRichard Henderson }; 23038989b95eSRichard Henderson assert(vece <= MO_32); 23048989b95eSRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 23058989b95eSRichard Henderson } 2306