109a52d85SRichard Henderson /* 209a52d85SRichard Henderson * ARM generic vector expansion 309a52d85SRichard Henderson * 409a52d85SRichard Henderson * Copyright (c) 2003 Fabrice Bellard 509a52d85SRichard Henderson * Copyright (c) 2005-2007 CodeSourcery 609a52d85SRichard Henderson * Copyright (c) 2007 OpenedHand, Ltd. 709a52d85SRichard Henderson * 809a52d85SRichard Henderson * This library is free software; you can redistribute it and/or 909a52d85SRichard Henderson * modify it under the terms of the GNU Lesser General Public 1009a52d85SRichard Henderson * License as published by the Free Software Foundation; either 1109a52d85SRichard Henderson * version 2.1 of the License, or (at your option) any later version. 1209a52d85SRichard Henderson * 1309a52d85SRichard Henderson * This library is distributed in the hope that it will be useful, 1409a52d85SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 1509a52d85SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1609a52d85SRichard Henderson * Lesser General Public License for more details. 1709a52d85SRichard Henderson * 1809a52d85SRichard Henderson * You should have received a copy of the GNU Lesser General Public 1909a52d85SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 2009a52d85SRichard Henderson */ 2109a52d85SRichard Henderson 2209a52d85SRichard Henderson #include "qemu/osdep.h" 2309a52d85SRichard Henderson #include "translate.h" 2409a52d85SRichard Henderson 2509a52d85SRichard Henderson 2609a52d85SRichard Henderson static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, 2709a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz, 2809a52d85SRichard Henderson gen_helper_gvec_3_ptr *fn) 2909a52d85SRichard Henderson { 3009a52d85SRichard Henderson TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 3109a52d85SRichard Henderson 3201d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 3309a52d85SRichard Henderson tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 3409a52d85SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, 3509a52d85SRichard Henderson opr_sz, max_sz, 0, fn); 3609a52d85SRichard Henderson } 3709a52d85SRichard Henderson 388f81dcedSRichard Henderson void gen_gvec_sqdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 398f81dcedSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 408f81dcedSRichard Henderson { 418f81dcedSRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 428f81dcedSRichard Henderson gen_helper_neon_sqdmulh_h, gen_helper_neon_sqdmulh_s 438f81dcedSRichard Henderson }; 448f81dcedSRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 458f81dcedSRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 468f81dcedSRichard Henderson } 478f81dcedSRichard Henderson 488f81dcedSRichard Henderson void gen_gvec_sqrdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 498f81dcedSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 508f81dcedSRichard Henderson { 518f81dcedSRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 528f81dcedSRichard Henderson gen_helper_neon_sqrdmulh_h, gen_helper_neon_sqrdmulh_s 538f81dcedSRichard Henderson }; 548f81dcedSRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 558f81dcedSRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 568f81dcedSRichard Henderson } 578f81dcedSRichard Henderson 5809a52d85SRichard Henderson void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 5909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 6009a52d85SRichard Henderson { 6109a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 6209a52d85SRichard Henderson gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 6309a52d85SRichard Henderson }; 6409a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 6509a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 6609a52d85SRichard Henderson } 6709a52d85SRichard Henderson 6809a52d85SRichard Henderson void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 6909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 7009a52d85SRichard Henderson { 7109a52d85SRichard Henderson static gen_helper_gvec_3_ptr * const fns[2] = { 7209a52d85SRichard Henderson gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 7309a52d85SRichard Henderson }; 7409a52d85SRichard Henderson tcg_debug_assert(vece >= 1 && vece <= 2); 7509a52d85SRichard Henderson gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); 7609a52d85SRichard Henderson } 7709a52d85SRichard Henderson 7809a52d85SRichard Henderson #define GEN_CMP0(NAME, COND) \ 7909a52d85SRichard Henderson void NAME(unsigned vece, uint32_t d, uint32_t m, \ 8009a52d85SRichard Henderson uint32_t opr_sz, uint32_t max_sz) \ 8109a52d85SRichard Henderson { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } 8209a52d85SRichard Henderson 8309a52d85SRichard Henderson GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) 8409a52d85SRichard Henderson GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) 8509a52d85SRichard Henderson GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) 8609a52d85SRichard Henderson GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) 8709a52d85SRichard Henderson GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) 8809a52d85SRichard Henderson 8909a52d85SRichard Henderson #undef GEN_CMP0 9009a52d85SRichard Henderson 91*00bcab5bSRichard Henderson void gen_gvec_sshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 92*00bcab5bSRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 93*00bcab5bSRichard Henderson { 94*00bcab5bSRichard Henderson /* Signed shift out of range results in all-sign-bits */ 95*00bcab5bSRichard Henderson shift = MIN(shift, (8 << vece) - 1); 96*00bcab5bSRichard Henderson tcg_gen_gvec_sari(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz); 97*00bcab5bSRichard Henderson } 98*00bcab5bSRichard Henderson 99*00bcab5bSRichard Henderson void gen_gvec_ushr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 100*00bcab5bSRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 101*00bcab5bSRichard Henderson { 102*00bcab5bSRichard Henderson /* Unsigned shift out of range results in all-zero-bits */ 103*00bcab5bSRichard Henderson if (shift >= (8 << vece)) { 104*00bcab5bSRichard Henderson tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 105*00bcab5bSRichard Henderson } else { 106*00bcab5bSRichard Henderson tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz); 107*00bcab5bSRichard Henderson } 108*00bcab5bSRichard Henderson } 109*00bcab5bSRichard Henderson 11009a52d85SRichard Henderson static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 11109a52d85SRichard Henderson { 11209a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, shift); 11309a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 11409a52d85SRichard Henderson } 11509a52d85SRichard Henderson 11609a52d85SRichard Henderson static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 11709a52d85SRichard Henderson { 11809a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, shift); 11909a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 12009a52d85SRichard Henderson } 12109a52d85SRichard Henderson 12209a52d85SRichard Henderson static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 12309a52d85SRichard Henderson { 12409a52d85SRichard Henderson tcg_gen_sari_i32(a, a, shift); 12509a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 12609a52d85SRichard Henderson } 12709a52d85SRichard Henderson 12809a52d85SRichard Henderson static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 12909a52d85SRichard Henderson { 13009a52d85SRichard Henderson tcg_gen_sari_i64(a, a, shift); 13109a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 13209a52d85SRichard Henderson } 13309a52d85SRichard Henderson 13409a52d85SRichard Henderson static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 13509a52d85SRichard Henderson { 13609a52d85SRichard Henderson tcg_gen_sari_vec(vece, a, a, sh); 13709a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 13809a52d85SRichard Henderson } 13909a52d85SRichard Henderson 14009a52d85SRichard Henderson void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 14109a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 14209a52d85SRichard Henderson { 14309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 14409a52d85SRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 14509a52d85SRichard Henderson }; 14609a52d85SRichard Henderson static const GVecGen2i ops[4] = { 14709a52d85SRichard Henderson { .fni8 = gen_ssra8_i64, 14809a52d85SRichard Henderson .fniv = gen_ssra_vec, 14909a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_b, 15009a52d85SRichard Henderson .load_dest = true, 15109a52d85SRichard Henderson .opt_opc = vecop_list, 15209a52d85SRichard Henderson .vece = MO_8 }, 15309a52d85SRichard Henderson { .fni8 = gen_ssra16_i64, 15409a52d85SRichard Henderson .fniv = gen_ssra_vec, 15509a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_h, 15609a52d85SRichard Henderson .load_dest = true, 15709a52d85SRichard Henderson .opt_opc = vecop_list, 15809a52d85SRichard Henderson .vece = MO_16 }, 15909a52d85SRichard Henderson { .fni4 = gen_ssra32_i32, 16009a52d85SRichard Henderson .fniv = gen_ssra_vec, 16109a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_s, 16209a52d85SRichard Henderson .load_dest = true, 16309a52d85SRichard Henderson .opt_opc = vecop_list, 16409a52d85SRichard Henderson .vece = MO_32 }, 16509a52d85SRichard Henderson { .fni8 = gen_ssra64_i64, 16609a52d85SRichard Henderson .fniv = gen_ssra_vec, 16709a52d85SRichard Henderson .fno = gen_helper_gvec_ssra_d, 16809a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 16909a52d85SRichard Henderson .opt_opc = vecop_list, 17009a52d85SRichard Henderson .load_dest = true, 17109a52d85SRichard Henderson .vece = MO_64 }, 17209a52d85SRichard Henderson }; 17309a52d85SRichard Henderson 17409a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 17509a52d85SRichard Henderson tcg_debug_assert(shift > 0); 17609a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 17709a52d85SRichard Henderson 17809a52d85SRichard Henderson /* 17909a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 18009a52d85SRichard Henderson * Signed results in all sign bits. 18109a52d85SRichard Henderson */ 18209a52d85SRichard Henderson shift = MIN(shift, (8 << vece) - 1); 18309a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 18409a52d85SRichard Henderson } 18509a52d85SRichard Henderson 18609a52d85SRichard Henderson static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 18709a52d85SRichard Henderson { 18809a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, shift); 18909a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, a); 19009a52d85SRichard Henderson } 19109a52d85SRichard Henderson 19209a52d85SRichard Henderson static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 19309a52d85SRichard Henderson { 19409a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, shift); 19509a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, a); 19609a52d85SRichard Henderson } 19709a52d85SRichard Henderson 19809a52d85SRichard Henderson static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 19909a52d85SRichard Henderson { 20009a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 20109a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 20209a52d85SRichard Henderson } 20309a52d85SRichard Henderson 20409a52d85SRichard Henderson static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 20509a52d85SRichard Henderson { 20609a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 20709a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 20809a52d85SRichard Henderson } 20909a52d85SRichard Henderson 21009a52d85SRichard Henderson static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 21109a52d85SRichard Henderson { 21209a52d85SRichard Henderson tcg_gen_shri_vec(vece, a, a, sh); 21309a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 21409a52d85SRichard Henderson } 21509a52d85SRichard Henderson 21609a52d85SRichard Henderson void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 21709a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 21809a52d85SRichard Henderson { 21909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 22009a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 22109a52d85SRichard Henderson }; 22209a52d85SRichard Henderson static const GVecGen2i ops[4] = { 22309a52d85SRichard Henderson { .fni8 = gen_usra8_i64, 22409a52d85SRichard Henderson .fniv = gen_usra_vec, 22509a52d85SRichard Henderson .fno = gen_helper_gvec_usra_b, 22609a52d85SRichard Henderson .load_dest = true, 22709a52d85SRichard Henderson .opt_opc = vecop_list, 22809a52d85SRichard Henderson .vece = MO_8, }, 22909a52d85SRichard Henderson { .fni8 = gen_usra16_i64, 23009a52d85SRichard Henderson .fniv = gen_usra_vec, 23109a52d85SRichard Henderson .fno = gen_helper_gvec_usra_h, 23209a52d85SRichard Henderson .load_dest = true, 23309a52d85SRichard Henderson .opt_opc = vecop_list, 23409a52d85SRichard Henderson .vece = MO_16, }, 23509a52d85SRichard Henderson { .fni4 = gen_usra32_i32, 23609a52d85SRichard Henderson .fniv = gen_usra_vec, 23709a52d85SRichard Henderson .fno = gen_helper_gvec_usra_s, 23809a52d85SRichard Henderson .load_dest = true, 23909a52d85SRichard Henderson .opt_opc = vecop_list, 24009a52d85SRichard Henderson .vece = MO_32, }, 24109a52d85SRichard Henderson { .fni8 = gen_usra64_i64, 24209a52d85SRichard Henderson .fniv = gen_usra_vec, 24309a52d85SRichard Henderson .fno = gen_helper_gvec_usra_d, 24409a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 24509a52d85SRichard Henderson .load_dest = true, 24609a52d85SRichard Henderson .opt_opc = vecop_list, 24709a52d85SRichard Henderson .vece = MO_64, }, 24809a52d85SRichard Henderson }; 24909a52d85SRichard Henderson 25009a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 25109a52d85SRichard Henderson tcg_debug_assert(shift > 0); 25209a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 25309a52d85SRichard Henderson 25409a52d85SRichard Henderson /* 25509a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 25609a52d85SRichard Henderson * Unsigned results in all zeros as input to accumulate: nop. 25709a52d85SRichard Henderson */ 25809a52d85SRichard Henderson if (shift < (8 << vece)) { 25909a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 26009a52d85SRichard Henderson } else { 26109a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 26209a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 26309a52d85SRichard Henderson } 26409a52d85SRichard Henderson } 26509a52d85SRichard Henderson 26609a52d85SRichard Henderson /* 26709a52d85SRichard Henderson * Shift one less than the requested amount, and the low bit is 26809a52d85SRichard Henderson * the rounding bit. For the 8 and 16-bit operations, because we 26909a52d85SRichard Henderson * mask the low bit, we can perform a normal integer shift instead 27009a52d85SRichard Henderson * of a vector shift. 27109a52d85SRichard Henderson */ 27209a52d85SRichard Henderson static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 27309a52d85SRichard Henderson { 27409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 27509a52d85SRichard Henderson 27609a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 27709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 27809a52d85SRichard Henderson tcg_gen_vec_sar8i_i64(d, a, sh); 27909a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 28009a52d85SRichard Henderson } 28109a52d85SRichard Henderson 28209a52d85SRichard Henderson static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 28309a52d85SRichard Henderson { 28409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 28509a52d85SRichard Henderson 28609a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 28709a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 28809a52d85SRichard Henderson tcg_gen_vec_sar16i_i64(d, a, sh); 28909a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 29009a52d85SRichard Henderson } 29109a52d85SRichard Henderson 29209a52d85SRichard Henderson void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 29309a52d85SRichard Henderson { 29409a52d85SRichard Henderson TCGv_i32 t; 29509a52d85SRichard Henderson 29609a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ 29709a52d85SRichard Henderson if (sh == 32) { 29809a52d85SRichard Henderson tcg_gen_movi_i32(d, 0); 29909a52d85SRichard Henderson return; 30009a52d85SRichard Henderson } 30109a52d85SRichard Henderson t = tcg_temp_new_i32(); 30209a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 30309a52d85SRichard Henderson tcg_gen_sari_i32(d, a, sh); 30409a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 30509a52d85SRichard Henderson } 30609a52d85SRichard Henderson 30709a52d85SRichard Henderson void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 30809a52d85SRichard Henderson { 30909a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 31009a52d85SRichard Henderson 31109a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 31209a52d85SRichard Henderson tcg_gen_sari_i64(d, a, sh); 31309a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 31409a52d85SRichard Henderson } 31509a52d85SRichard Henderson 31609a52d85SRichard Henderson static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 31709a52d85SRichard Henderson { 31809a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 319143e179cSRichard Henderson TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1); 32009a52d85SRichard Henderson 32109a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 32209a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 32309a52d85SRichard Henderson tcg_gen_sari_vec(vece, d, a, sh); 32409a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 32509a52d85SRichard Henderson } 32609a52d85SRichard Henderson 32709a52d85SRichard Henderson void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 32809a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 32909a52d85SRichard Henderson { 33009a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 33109a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 33209a52d85SRichard Henderson }; 33309a52d85SRichard Henderson static const GVecGen2i ops[4] = { 33409a52d85SRichard Henderson { .fni8 = gen_srshr8_i64, 33509a52d85SRichard Henderson .fniv = gen_srshr_vec, 33609a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_b, 33709a52d85SRichard Henderson .opt_opc = vecop_list, 33809a52d85SRichard Henderson .vece = MO_8 }, 33909a52d85SRichard Henderson { .fni8 = gen_srshr16_i64, 34009a52d85SRichard Henderson .fniv = gen_srshr_vec, 34109a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_h, 34209a52d85SRichard Henderson .opt_opc = vecop_list, 34309a52d85SRichard Henderson .vece = MO_16 }, 34409a52d85SRichard Henderson { .fni4 = gen_srshr32_i32, 34509a52d85SRichard Henderson .fniv = gen_srshr_vec, 34609a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_s, 34709a52d85SRichard Henderson .opt_opc = vecop_list, 34809a52d85SRichard Henderson .vece = MO_32 }, 34909a52d85SRichard Henderson { .fni8 = gen_srshr64_i64, 35009a52d85SRichard Henderson .fniv = gen_srshr_vec, 35109a52d85SRichard Henderson .fno = gen_helper_gvec_srshr_d, 35209a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 35309a52d85SRichard Henderson .opt_opc = vecop_list, 35409a52d85SRichard Henderson .vece = MO_64 }, 35509a52d85SRichard Henderson }; 35609a52d85SRichard Henderson 35709a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 35809a52d85SRichard Henderson tcg_debug_assert(shift > 0); 35909a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 36009a52d85SRichard Henderson 36109a52d85SRichard Henderson if (shift == (8 << vece)) { 36209a52d85SRichard Henderson /* 36309a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 36409a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 36509a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 36609a52d85SRichard Henderson * I.e. always zero. 36709a52d85SRichard Henderson */ 36809a52d85SRichard Henderson tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); 36909a52d85SRichard Henderson } else { 37009a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 37109a52d85SRichard Henderson } 37209a52d85SRichard Henderson } 37309a52d85SRichard Henderson 37409a52d85SRichard Henderson static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 37509a52d85SRichard Henderson { 37609a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 37709a52d85SRichard Henderson 37809a52d85SRichard Henderson gen_srshr8_i64(t, a, sh); 37909a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 38009a52d85SRichard Henderson } 38109a52d85SRichard Henderson 38209a52d85SRichard Henderson static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 38309a52d85SRichard Henderson { 38409a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 38509a52d85SRichard Henderson 38609a52d85SRichard Henderson gen_srshr16_i64(t, a, sh); 38709a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 38809a52d85SRichard Henderson } 38909a52d85SRichard Henderson 39009a52d85SRichard Henderson static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 39109a52d85SRichard Henderson { 39209a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 39309a52d85SRichard Henderson 39409a52d85SRichard Henderson gen_srshr32_i32(t, a, sh); 39509a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 39609a52d85SRichard Henderson } 39709a52d85SRichard Henderson 39809a52d85SRichard Henderson static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 39909a52d85SRichard Henderson { 40009a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 40109a52d85SRichard Henderson 40209a52d85SRichard Henderson gen_srshr64_i64(t, a, sh); 40309a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 40409a52d85SRichard Henderson } 40509a52d85SRichard Henderson 40609a52d85SRichard Henderson static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 40709a52d85SRichard Henderson { 40809a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 40909a52d85SRichard Henderson 41009a52d85SRichard Henderson gen_srshr_vec(vece, t, a, sh); 41109a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 41209a52d85SRichard Henderson } 41309a52d85SRichard Henderson 41409a52d85SRichard Henderson void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 41509a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 41609a52d85SRichard Henderson { 41709a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 41809a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 41909a52d85SRichard Henderson }; 42009a52d85SRichard Henderson static const GVecGen2i ops[4] = { 42109a52d85SRichard Henderson { .fni8 = gen_srsra8_i64, 42209a52d85SRichard Henderson .fniv = gen_srsra_vec, 42309a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_b, 42409a52d85SRichard Henderson .opt_opc = vecop_list, 42509a52d85SRichard Henderson .load_dest = true, 42609a52d85SRichard Henderson .vece = MO_8 }, 42709a52d85SRichard Henderson { .fni8 = gen_srsra16_i64, 42809a52d85SRichard Henderson .fniv = gen_srsra_vec, 42909a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_h, 43009a52d85SRichard Henderson .opt_opc = vecop_list, 43109a52d85SRichard Henderson .load_dest = true, 43209a52d85SRichard Henderson .vece = MO_16 }, 43309a52d85SRichard Henderson { .fni4 = gen_srsra32_i32, 43409a52d85SRichard Henderson .fniv = gen_srsra_vec, 43509a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_s, 43609a52d85SRichard Henderson .opt_opc = vecop_list, 43709a52d85SRichard Henderson .load_dest = true, 43809a52d85SRichard Henderson .vece = MO_32 }, 43909a52d85SRichard Henderson { .fni8 = gen_srsra64_i64, 44009a52d85SRichard Henderson .fniv = gen_srsra_vec, 44109a52d85SRichard Henderson .fno = gen_helper_gvec_srsra_d, 44209a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 44309a52d85SRichard Henderson .opt_opc = vecop_list, 44409a52d85SRichard Henderson .load_dest = true, 44509a52d85SRichard Henderson .vece = MO_64 }, 44609a52d85SRichard Henderson }; 44709a52d85SRichard Henderson 44809a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 44909a52d85SRichard Henderson tcg_debug_assert(shift > 0); 45009a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 45109a52d85SRichard Henderson 45209a52d85SRichard Henderson /* 45309a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 45409a52d85SRichard Henderson * Signed results in all sign bits. With rounding, this produces 45509a52d85SRichard Henderson * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. 45609a52d85SRichard Henderson * I.e. always zero. With accumulation, this leaves D unchanged. 45709a52d85SRichard Henderson */ 45809a52d85SRichard Henderson if (shift == (8 << vece)) { 45909a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 46009a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 46109a52d85SRichard Henderson } else { 46209a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 46309a52d85SRichard Henderson } 46409a52d85SRichard Henderson } 46509a52d85SRichard Henderson 46609a52d85SRichard Henderson static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 46709a52d85SRichard Henderson { 46809a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 46909a52d85SRichard Henderson 47009a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 47109a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 47209a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(d, a, sh); 47309a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 47409a52d85SRichard Henderson } 47509a52d85SRichard Henderson 47609a52d85SRichard Henderson static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 47709a52d85SRichard Henderson { 47809a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 47909a52d85SRichard Henderson 48009a52d85SRichard Henderson tcg_gen_shri_i64(t, a, sh - 1); 48109a52d85SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 48209a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(d, a, sh); 48309a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 48409a52d85SRichard Henderson } 48509a52d85SRichard Henderson 48609a52d85SRichard Henderson void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 48709a52d85SRichard Henderson { 48809a52d85SRichard Henderson TCGv_i32 t; 48909a52d85SRichard Henderson 49009a52d85SRichard Henderson /* Handle shift by the input size for the benefit of trans_URSHR_ri */ 49109a52d85SRichard Henderson if (sh == 32) { 49209a52d85SRichard Henderson tcg_gen_extract_i32(d, a, sh - 1, 1); 49309a52d85SRichard Henderson return; 49409a52d85SRichard Henderson } 49509a52d85SRichard Henderson t = tcg_temp_new_i32(); 49609a52d85SRichard Henderson tcg_gen_extract_i32(t, a, sh - 1, 1); 49709a52d85SRichard Henderson tcg_gen_shri_i32(d, a, sh); 49809a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 49909a52d85SRichard Henderson } 50009a52d85SRichard Henderson 50109a52d85SRichard Henderson void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 50209a52d85SRichard Henderson { 50309a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 50409a52d85SRichard Henderson 50509a52d85SRichard Henderson tcg_gen_extract_i64(t, a, sh - 1, 1); 50609a52d85SRichard Henderson tcg_gen_shri_i64(d, a, sh); 50709a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 50809a52d85SRichard Henderson } 50909a52d85SRichard Henderson 51009a52d85SRichard Henderson static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) 51109a52d85SRichard Henderson { 51209a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 513143e179cSRichard Henderson TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1); 51409a52d85SRichard Henderson 51509a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, shift - 1); 51609a52d85SRichard Henderson tcg_gen_and_vec(vece, t, t, ones); 51709a52d85SRichard Henderson tcg_gen_shri_vec(vece, d, a, shift); 51809a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 51909a52d85SRichard Henderson } 52009a52d85SRichard Henderson 52109a52d85SRichard Henderson void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 52209a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 52309a52d85SRichard Henderson { 52409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 52509a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 52609a52d85SRichard Henderson }; 52709a52d85SRichard Henderson static const GVecGen2i ops[4] = { 52809a52d85SRichard Henderson { .fni8 = gen_urshr8_i64, 52909a52d85SRichard Henderson .fniv = gen_urshr_vec, 53009a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_b, 53109a52d85SRichard Henderson .opt_opc = vecop_list, 53209a52d85SRichard Henderson .vece = MO_8 }, 53309a52d85SRichard Henderson { .fni8 = gen_urshr16_i64, 53409a52d85SRichard Henderson .fniv = gen_urshr_vec, 53509a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_h, 53609a52d85SRichard Henderson .opt_opc = vecop_list, 53709a52d85SRichard Henderson .vece = MO_16 }, 53809a52d85SRichard Henderson { .fni4 = gen_urshr32_i32, 53909a52d85SRichard Henderson .fniv = gen_urshr_vec, 54009a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_s, 54109a52d85SRichard Henderson .opt_opc = vecop_list, 54209a52d85SRichard Henderson .vece = MO_32 }, 54309a52d85SRichard Henderson { .fni8 = gen_urshr64_i64, 54409a52d85SRichard Henderson .fniv = gen_urshr_vec, 54509a52d85SRichard Henderson .fno = gen_helper_gvec_urshr_d, 54609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 54709a52d85SRichard Henderson .opt_opc = vecop_list, 54809a52d85SRichard Henderson .vece = MO_64 }, 54909a52d85SRichard Henderson }; 55009a52d85SRichard Henderson 55109a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 55209a52d85SRichard Henderson tcg_debug_assert(shift > 0); 55309a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 55409a52d85SRichard Henderson 55509a52d85SRichard Henderson if (shift == (8 << vece)) { 55609a52d85SRichard Henderson /* 55709a52d85SRichard Henderson * Shifts larger than the element size are architecturally valid. 55809a52d85SRichard Henderson * Unsigned results in zero. With rounding, this produces a 55909a52d85SRichard Henderson * copy of the most significant bit. 56009a52d85SRichard Henderson */ 56109a52d85SRichard Henderson tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); 56209a52d85SRichard Henderson } else { 56309a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 56409a52d85SRichard Henderson } 56509a52d85SRichard Henderson } 56609a52d85SRichard Henderson 56709a52d85SRichard Henderson static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 56809a52d85SRichard Henderson { 56909a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 57009a52d85SRichard Henderson 57109a52d85SRichard Henderson if (sh == 8) { 57209a52d85SRichard Henderson tcg_gen_vec_shr8i_i64(t, a, 7); 57309a52d85SRichard Henderson } else { 57409a52d85SRichard Henderson gen_urshr8_i64(t, a, sh); 57509a52d85SRichard Henderson } 57609a52d85SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 57709a52d85SRichard Henderson } 57809a52d85SRichard Henderson 57909a52d85SRichard Henderson static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 58009a52d85SRichard Henderson { 58109a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 58209a52d85SRichard Henderson 58309a52d85SRichard Henderson if (sh == 16) { 58409a52d85SRichard Henderson tcg_gen_vec_shr16i_i64(t, a, 15); 58509a52d85SRichard Henderson } else { 58609a52d85SRichard Henderson gen_urshr16_i64(t, a, sh); 58709a52d85SRichard Henderson } 58809a52d85SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 58909a52d85SRichard Henderson } 59009a52d85SRichard Henderson 59109a52d85SRichard Henderson static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) 59209a52d85SRichard Henderson { 59309a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 59409a52d85SRichard Henderson 59509a52d85SRichard Henderson if (sh == 32) { 59609a52d85SRichard Henderson tcg_gen_shri_i32(t, a, 31); 59709a52d85SRichard Henderson } else { 59809a52d85SRichard Henderson gen_urshr32_i32(t, a, sh); 59909a52d85SRichard Henderson } 60009a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 60109a52d85SRichard Henderson } 60209a52d85SRichard Henderson 60309a52d85SRichard Henderson static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) 60409a52d85SRichard Henderson { 60509a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 60609a52d85SRichard Henderson 60709a52d85SRichard Henderson if (sh == 64) { 60809a52d85SRichard Henderson tcg_gen_shri_i64(t, a, 63); 60909a52d85SRichard Henderson } else { 61009a52d85SRichard Henderson gen_urshr64_i64(t, a, sh); 61109a52d85SRichard Henderson } 61209a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 61309a52d85SRichard Henderson } 61409a52d85SRichard Henderson 61509a52d85SRichard Henderson static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 61609a52d85SRichard Henderson { 61709a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 61809a52d85SRichard Henderson 61909a52d85SRichard Henderson if (sh == (8 << vece)) { 62009a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh - 1); 62109a52d85SRichard Henderson } else { 62209a52d85SRichard Henderson gen_urshr_vec(vece, t, a, sh); 62309a52d85SRichard Henderson } 62409a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 62509a52d85SRichard Henderson } 62609a52d85SRichard Henderson 62709a52d85SRichard Henderson void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 62809a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 62909a52d85SRichard Henderson { 63009a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 63109a52d85SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 63209a52d85SRichard Henderson }; 63309a52d85SRichard Henderson static const GVecGen2i ops[4] = { 63409a52d85SRichard Henderson { .fni8 = gen_ursra8_i64, 63509a52d85SRichard Henderson .fniv = gen_ursra_vec, 63609a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_b, 63709a52d85SRichard Henderson .opt_opc = vecop_list, 63809a52d85SRichard Henderson .load_dest = true, 63909a52d85SRichard Henderson .vece = MO_8 }, 64009a52d85SRichard Henderson { .fni8 = gen_ursra16_i64, 64109a52d85SRichard Henderson .fniv = gen_ursra_vec, 64209a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_h, 64309a52d85SRichard Henderson .opt_opc = vecop_list, 64409a52d85SRichard Henderson .load_dest = true, 64509a52d85SRichard Henderson .vece = MO_16 }, 64609a52d85SRichard Henderson { .fni4 = gen_ursra32_i32, 64709a52d85SRichard Henderson .fniv = gen_ursra_vec, 64809a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_s, 64909a52d85SRichard Henderson .opt_opc = vecop_list, 65009a52d85SRichard Henderson .load_dest = true, 65109a52d85SRichard Henderson .vece = MO_32 }, 65209a52d85SRichard Henderson { .fni8 = gen_ursra64_i64, 65309a52d85SRichard Henderson .fniv = gen_ursra_vec, 65409a52d85SRichard Henderson .fno = gen_helper_gvec_ursra_d, 65509a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 65609a52d85SRichard Henderson .opt_opc = vecop_list, 65709a52d85SRichard Henderson .load_dest = true, 65809a52d85SRichard Henderson .vece = MO_64 }, 65909a52d85SRichard Henderson }; 66009a52d85SRichard Henderson 66109a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize] */ 66209a52d85SRichard Henderson tcg_debug_assert(shift > 0); 66309a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 66409a52d85SRichard Henderson 66509a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 66609a52d85SRichard Henderson } 66709a52d85SRichard Henderson 66809a52d85SRichard Henderson static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 66909a52d85SRichard Henderson { 67009a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff >> shift); 67109a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 67209a52d85SRichard Henderson 67309a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 67409a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 67509a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 67609a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 67709a52d85SRichard Henderson } 67809a52d85SRichard Henderson 67909a52d85SRichard Henderson static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 68009a52d85SRichard Henderson { 68109a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff >> shift); 68209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 68309a52d85SRichard Henderson 68409a52d85SRichard Henderson tcg_gen_shri_i64(t, a, shift); 68509a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 68609a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 68709a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 68809a52d85SRichard Henderson } 68909a52d85SRichard Henderson 69009a52d85SRichard Henderson static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 69109a52d85SRichard Henderson { 69209a52d85SRichard Henderson tcg_gen_shri_i32(a, a, shift); 69309a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); 69409a52d85SRichard Henderson } 69509a52d85SRichard Henderson 69609a52d85SRichard Henderson static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 69709a52d85SRichard Henderson { 69809a52d85SRichard Henderson tcg_gen_shri_i64(a, a, shift); 69909a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); 70009a52d85SRichard Henderson } 70109a52d85SRichard Henderson 70209a52d85SRichard Henderson static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 70309a52d85SRichard Henderson { 70409a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 705143e179cSRichard Henderson int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh); 706143e179cSRichard Henderson TCGv_vec m = tcg_constant_vec_matching(d, vece, mi); 70709a52d85SRichard Henderson 70809a52d85SRichard Henderson tcg_gen_shri_vec(vece, t, a, sh); 70909a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 71009a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 71109a52d85SRichard Henderson } 71209a52d85SRichard Henderson 71309a52d85SRichard Henderson void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 71409a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 71509a52d85SRichard Henderson { 71609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; 71709a52d85SRichard Henderson const GVecGen2i ops[4] = { 71809a52d85SRichard Henderson { .fni8 = gen_shr8_ins_i64, 71909a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 72009a52d85SRichard Henderson .fno = gen_helper_gvec_sri_b, 72109a52d85SRichard Henderson .load_dest = true, 72209a52d85SRichard Henderson .opt_opc = vecop_list, 72309a52d85SRichard Henderson .vece = MO_8 }, 72409a52d85SRichard Henderson { .fni8 = gen_shr16_ins_i64, 72509a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 72609a52d85SRichard Henderson .fno = gen_helper_gvec_sri_h, 72709a52d85SRichard Henderson .load_dest = true, 72809a52d85SRichard Henderson .opt_opc = vecop_list, 72909a52d85SRichard Henderson .vece = MO_16 }, 73009a52d85SRichard Henderson { .fni4 = gen_shr32_ins_i32, 73109a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 73209a52d85SRichard Henderson .fno = gen_helper_gvec_sri_s, 73309a52d85SRichard Henderson .load_dest = true, 73409a52d85SRichard Henderson .opt_opc = vecop_list, 73509a52d85SRichard Henderson .vece = MO_32 }, 73609a52d85SRichard Henderson { .fni8 = gen_shr64_ins_i64, 73709a52d85SRichard Henderson .fniv = gen_shr_ins_vec, 73809a52d85SRichard Henderson .fno = gen_helper_gvec_sri_d, 73909a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 74009a52d85SRichard Henderson .load_dest = true, 74109a52d85SRichard Henderson .opt_opc = vecop_list, 74209a52d85SRichard Henderson .vece = MO_64 }, 74309a52d85SRichard Henderson }; 74409a52d85SRichard Henderson 74509a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [1..esize]. */ 74609a52d85SRichard Henderson tcg_debug_assert(shift > 0); 74709a52d85SRichard Henderson tcg_debug_assert(shift <= (8 << vece)); 74809a52d85SRichard Henderson 74909a52d85SRichard Henderson /* Shift of esize leaves destination unchanged. */ 75009a52d85SRichard Henderson if (shift < (8 << vece)) { 75109a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 75209a52d85SRichard Henderson } else { 75309a52d85SRichard Henderson /* Nop, but we do need to clear the tail. */ 75409a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); 75509a52d85SRichard Henderson } 75609a52d85SRichard Henderson } 75709a52d85SRichard Henderson 75809a52d85SRichard Henderson static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 75909a52d85SRichard Henderson { 76009a52d85SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff << shift); 76109a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 76209a52d85SRichard Henderson 76309a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 76409a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 76509a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 76609a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 76709a52d85SRichard Henderson } 76809a52d85SRichard Henderson 76909a52d85SRichard Henderson static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 77009a52d85SRichard Henderson { 77109a52d85SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff << shift); 77209a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 77309a52d85SRichard Henderson 77409a52d85SRichard Henderson tcg_gen_shli_i64(t, a, shift); 77509a52d85SRichard Henderson tcg_gen_andi_i64(t, t, mask); 77609a52d85SRichard Henderson tcg_gen_andi_i64(d, d, ~mask); 77709a52d85SRichard Henderson tcg_gen_or_i64(d, d, t); 77809a52d85SRichard Henderson } 77909a52d85SRichard Henderson 78009a52d85SRichard Henderson static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) 78109a52d85SRichard Henderson { 78209a52d85SRichard Henderson tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); 78309a52d85SRichard Henderson } 78409a52d85SRichard Henderson 78509a52d85SRichard Henderson static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) 78609a52d85SRichard Henderson { 78709a52d85SRichard Henderson tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); 78809a52d85SRichard Henderson } 78909a52d85SRichard Henderson 79009a52d85SRichard Henderson static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) 79109a52d85SRichard Henderson { 79209a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 793143e179cSRichard Henderson TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh)); 79409a52d85SRichard Henderson 79509a52d85SRichard Henderson tcg_gen_shli_vec(vece, t, a, sh); 79609a52d85SRichard Henderson tcg_gen_and_vec(vece, d, d, m); 79709a52d85SRichard Henderson tcg_gen_or_vec(vece, d, d, t); 79809a52d85SRichard Henderson } 79909a52d85SRichard Henderson 80009a52d85SRichard Henderson void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 80109a52d85SRichard Henderson int64_t shift, uint32_t opr_sz, uint32_t max_sz) 80209a52d85SRichard Henderson { 80309a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; 80409a52d85SRichard Henderson const GVecGen2i ops[4] = { 80509a52d85SRichard Henderson { .fni8 = gen_shl8_ins_i64, 80609a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 80709a52d85SRichard Henderson .fno = gen_helper_gvec_sli_b, 80809a52d85SRichard Henderson .load_dest = true, 80909a52d85SRichard Henderson .opt_opc = vecop_list, 81009a52d85SRichard Henderson .vece = MO_8 }, 81109a52d85SRichard Henderson { .fni8 = gen_shl16_ins_i64, 81209a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 81309a52d85SRichard Henderson .fno = gen_helper_gvec_sli_h, 81409a52d85SRichard Henderson .load_dest = true, 81509a52d85SRichard Henderson .opt_opc = vecop_list, 81609a52d85SRichard Henderson .vece = MO_16 }, 81709a52d85SRichard Henderson { .fni4 = gen_shl32_ins_i32, 81809a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 81909a52d85SRichard Henderson .fno = gen_helper_gvec_sli_s, 82009a52d85SRichard Henderson .load_dest = true, 82109a52d85SRichard Henderson .opt_opc = vecop_list, 82209a52d85SRichard Henderson .vece = MO_32 }, 82309a52d85SRichard Henderson { .fni8 = gen_shl64_ins_i64, 82409a52d85SRichard Henderson .fniv = gen_shl_ins_vec, 82509a52d85SRichard Henderson .fno = gen_helper_gvec_sli_d, 82609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 82709a52d85SRichard Henderson .load_dest = true, 82809a52d85SRichard Henderson .opt_opc = vecop_list, 82909a52d85SRichard Henderson .vece = MO_64 }, 83009a52d85SRichard Henderson }; 83109a52d85SRichard Henderson 83209a52d85SRichard Henderson /* tszimm encoding produces immediates in the range [0..esize-1]. */ 83309a52d85SRichard Henderson tcg_debug_assert(shift >= 0); 83409a52d85SRichard Henderson tcg_debug_assert(shift < (8 << vece)); 83509a52d85SRichard Henderson 83609a52d85SRichard Henderson if (shift == 0) { 83709a52d85SRichard Henderson tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); 83809a52d85SRichard Henderson } else { 83909a52d85SRichard Henderson tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); 84009a52d85SRichard Henderson } 84109a52d85SRichard Henderson } 84209a52d85SRichard Henderson 84309a52d85SRichard Henderson static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 84409a52d85SRichard Henderson { 84509a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 84609a52d85SRichard Henderson gen_helper_neon_add_u8(d, d, a); 84709a52d85SRichard Henderson } 84809a52d85SRichard Henderson 84909a52d85SRichard Henderson static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 85009a52d85SRichard Henderson { 85109a52d85SRichard Henderson gen_helper_neon_mul_u8(a, a, b); 85209a52d85SRichard Henderson gen_helper_neon_sub_u8(d, d, a); 85309a52d85SRichard Henderson } 85409a52d85SRichard Henderson 85509a52d85SRichard Henderson static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 85609a52d85SRichard Henderson { 85709a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 85809a52d85SRichard Henderson gen_helper_neon_add_u16(d, d, a); 85909a52d85SRichard Henderson } 86009a52d85SRichard Henderson 86109a52d85SRichard Henderson static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 86209a52d85SRichard Henderson { 86309a52d85SRichard Henderson gen_helper_neon_mul_u16(a, a, b); 86409a52d85SRichard Henderson gen_helper_neon_sub_u16(d, d, a); 86509a52d85SRichard Henderson } 86609a52d85SRichard Henderson 86709a52d85SRichard Henderson static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 86809a52d85SRichard Henderson { 86909a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 87009a52d85SRichard Henderson tcg_gen_add_i32(d, d, a); 87109a52d85SRichard Henderson } 87209a52d85SRichard Henderson 87309a52d85SRichard Henderson static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 87409a52d85SRichard Henderson { 87509a52d85SRichard Henderson tcg_gen_mul_i32(a, a, b); 87609a52d85SRichard Henderson tcg_gen_sub_i32(d, d, a); 87709a52d85SRichard Henderson } 87809a52d85SRichard Henderson 87909a52d85SRichard Henderson static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 88009a52d85SRichard Henderson { 88109a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 88209a52d85SRichard Henderson tcg_gen_add_i64(d, d, a); 88309a52d85SRichard Henderson } 88409a52d85SRichard Henderson 88509a52d85SRichard Henderson static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 88609a52d85SRichard Henderson { 88709a52d85SRichard Henderson tcg_gen_mul_i64(a, a, b); 88809a52d85SRichard Henderson tcg_gen_sub_i64(d, d, a); 88909a52d85SRichard Henderson } 89009a52d85SRichard Henderson 89109a52d85SRichard Henderson static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 89209a52d85SRichard Henderson { 89309a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 89409a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, a); 89509a52d85SRichard Henderson } 89609a52d85SRichard Henderson 89709a52d85SRichard Henderson static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 89809a52d85SRichard Henderson { 89909a52d85SRichard Henderson tcg_gen_mul_vec(vece, a, a, b); 90009a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, a); 90109a52d85SRichard Henderson } 90209a52d85SRichard Henderson 90309a52d85SRichard Henderson /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, 90409a52d85SRichard Henderson * these tables are shared with AArch64 which does support them. 90509a52d85SRichard Henderson */ 90609a52d85SRichard Henderson void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 90709a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 90809a52d85SRichard Henderson { 90909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 91009a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_add_vec, 0 91109a52d85SRichard Henderson }; 91209a52d85SRichard Henderson static const GVecGen3 ops[4] = { 91309a52d85SRichard Henderson { .fni4 = gen_mla8_i32, 91409a52d85SRichard Henderson .fniv = gen_mla_vec, 91509a52d85SRichard Henderson .load_dest = true, 91609a52d85SRichard Henderson .opt_opc = vecop_list, 91709a52d85SRichard Henderson .vece = MO_8 }, 91809a52d85SRichard Henderson { .fni4 = gen_mla16_i32, 91909a52d85SRichard Henderson .fniv = gen_mla_vec, 92009a52d85SRichard Henderson .load_dest = true, 92109a52d85SRichard Henderson .opt_opc = vecop_list, 92209a52d85SRichard Henderson .vece = MO_16 }, 92309a52d85SRichard Henderson { .fni4 = gen_mla32_i32, 92409a52d85SRichard Henderson .fniv = gen_mla_vec, 92509a52d85SRichard Henderson .load_dest = true, 92609a52d85SRichard Henderson .opt_opc = vecop_list, 92709a52d85SRichard Henderson .vece = MO_32 }, 92809a52d85SRichard Henderson { .fni8 = gen_mla64_i64, 92909a52d85SRichard Henderson .fniv = gen_mla_vec, 93009a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 93109a52d85SRichard Henderson .load_dest = true, 93209a52d85SRichard Henderson .opt_opc = vecop_list, 93309a52d85SRichard Henderson .vece = MO_64 }, 93409a52d85SRichard Henderson }; 93509a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 93609a52d85SRichard Henderson } 93709a52d85SRichard Henderson 93809a52d85SRichard Henderson void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 93909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 94009a52d85SRichard Henderson { 94109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 94209a52d85SRichard Henderson INDEX_op_mul_vec, INDEX_op_sub_vec, 0 94309a52d85SRichard Henderson }; 94409a52d85SRichard Henderson static const GVecGen3 ops[4] = { 94509a52d85SRichard Henderson { .fni4 = gen_mls8_i32, 94609a52d85SRichard Henderson .fniv = gen_mls_vec, 94709a52d85SRichard Henderson .load_dest = true, 94809a52d85SRichard Henderson .opt_opc = vecop_list, 94909a52d85SRichard Henderson .vece = MO_8 }, 95009a52d85SRichard Henderson { .fni4 = gen_mls16_i32, 95109a52d85SRichard Henderson .fniv = gen_mls_vec, 95209a52d85SRichard Henderson .load_dest = true, 95309a52d85SRichard Henderson .opt_opc = vecop_list, 95409a52d85SRichard Henderson .vece = MO_16 }, 95509a52d85SRichard Henderson { .fni4 = gen_mls32_i32, 95609a52d85SRichard Henderson .fniv = gen_mls_vec, 95709a52d85SRichard Henderson .load_dest = true, 95809a52d85SRichard Henderson .opt_opc = vecop_list, 95909a52d85SRichard Henderson .vece = MO_32 }, 96009a52d85SRichard Henderson { .fni8 = gen_mls64_i64, 96109a52d85SRichard Henderson .fniv = gen_mls_vec, 96209a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 96309a52d85SRichard Henderson .load_dest = true, 96409a52d85SRichard Henderson .opt_opc = vecop_list, 96509a52d85SRichard Henderson .vece = MO_64 }, 96609a52d85SRichard Henderson }; 96709a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 96809a52d85SRichard Henderson } 96909a52d85SRichard Henderson 97009a52d85SRichard Henderson /* CMTST : test is "if (X & Y != 0)". */ 97109a52d85SRichard Henderson static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 97209a52d85SRichard Henderson { 973013506e0SRichard Henderson tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b); 97409a52d85SRichard Henderson } 97509a52d85SRichard Henderson 97609a52d85SRichard Henderson void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 97709a52d85SRichard Henderson { 978013506e0SRichard Henderson tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b); 97909a52d85SRichard Henderson } 98009a52d85SRichard Henderson 98109a52d85SRichard Henderson static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 98209a52d85SRichard Henderson { 9832310eb0aSRichard Henderson tcg_gen_cmp_vec(TCG_COND_TSTNE, vece, d, a, b); 98409a52d85SRichard Henderson } 98509a52d85SRichard Henderson 98609a52d85SRichard Henderson void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 98709a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 98809a52d85SRichard Henderson { 98909a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; 99009a52d85SRichard Henderson static const GVecGen3 ops[4] = { 99109a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u8, 99209a52d85SRichard Henderson .fniv = gen_cmtst_vec, 99309a52d85SRichard Henderson .opt_opc = vecop_list, 99409a52d85SRichard Henderson .vece = MO_8 }, 99509a52d85SRichard Henderson { .fni4 = gen_helper_neon_tst_u16, 99609a52d85SRichard Henderson .fniv = gen_cmtst_vec, 99709a52d85SRichard Henderson .opt_opc = vecop_list, 99809a52d85SRichard Henderson .vece = MO_16 }, 99909a52d85SRichard Henderson { .fni4 = gen_cmtst_i32, 100009a52d85SRichard Henderson .fniv = gen_cmtst_vec, 100109a52d85SRichard Henderson .opt_opc = vecop_list, 100209a52d85SRichard Henderson .vece = MO_32 }, 100309a52d85SRichard Henderson { .fni8 = gen_cmtst_i64, 100409a52d85SRichard Henderson .fniv = gen_cmtst_vec, 100509a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 100609a52d85SRichard Henderson .opt_opc = vecop_list, 100709a52d85SRichard Henderson .vece = MO_64 }, 100809a52d85SRichard Henderson }; 100909a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 101009a52d85SRichard Henderson } 101109a52d85SRichard Henderson 101209a52d85SRichard Henderson void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 101309a52d85SRichard Henderson { 101409a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 101509a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 101609a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 101709a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 101809a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 101909a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(32); 102009a52d85SRichard Henderson 102109a52d85SRichard Henderson /* 102209a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 102309a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 102409a52d85SRichard Henderson * Discard out-of-range results after the fact. 102509a52d85SRichard Henderson */ 102609a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 102709a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 102809a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 102909a52d85SRichard Henderson tcg_gen_shr_i32(rval, src, rsh); 103009a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); 103109a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); 103209a52d85SRichard Henderson } 103309a52d85SRichard Henderson 103409a52d85SRichard Henderson void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 103509a52d85SRichard Henderson { 103609a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 103709a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 103809a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 103909a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 104009a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 104109a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(64); 104209a52d85SRichard Henderson 104309a52d85SRichard Henderson /* 104409a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 104509a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 104609a52d85SRichard Henderson * Discard out-of-range results after the fact. 104709a52d85SRichard Henderson */ 104809a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 104909a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 105009a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 105109a52d85SRichard Henderson tcg_gen_shr_i64(rval, src, rsh); 105209a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); 105309a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); 105409a52d85SRichard Henderson } 105509a52d85SRichard Henderson 105609a52d85SRichard Henderson static void gen_ushl_vec(unsigned vece, TCGv_vec dst, 105709a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 105809a52d85SRichard Henderson { 105909a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 106009a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 106109a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 106209a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1063c17e35b8SRichard Henderson TCGv_vec max, zero; 106409a52d85SRichard Henderson 106509a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 106609a52d85SRichard Henderson if (vece == MO_8) { 106709a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 106809a52d85SRichard Henderson } else { 1069143e179cSRichard Henderson TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff); 107009a52d85SRichard Henderson tcg_gen_and_vec(vece, lsh, shift, msk); 107109a52d85SRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, msk); 107209a52d85SRichard Henderson } 107309a52d85SRichard Henderson 107409a52d85SRichard Henderson /* 107509a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 107609a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 107709a52d85SRichard Henderson * Discard out-of-range results after the fact. 107809a52d85SRichard Henderson */ 107909a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 108009a52d85SRichard Henderson tcg_gen_shrv_vec(vece, rval, src, rsh); 108109a52d85SRichard Henderson 108209a52d85SRichard Henderson /* 1083c17e35b8SRichard Henderson * The choice of GE (signed) and GEU (unsigned) are biased toward 108409a52d85SRichard Henderson * the instructions of the x86_64 host. For MO_8, the whole byte 108509a52d85SRichard Henderson * is significant so we must use an unsigned compare; otherwise we 108609a52d85SRichard Henderson * have already masked to a byte and so a signed compare works. 108709a52d85SRichard Henderson * Other tcg hosts have a full set of comparisons and do not care. 108809a52d85SRichard Henderson */ 1089c17e35b8SRichard Henderson zero = tcg_constant_vec_matching(dst, vece, 0); 1090143e179cSRichard Henderson max = tcg_constant_vec_matching(dst, vece, 8 << vece); 109109a52d85SRichard Henderson if (vece == MO_8) { 1092c17e35b8SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, lval, lsh, max, zero, lval); 1093c17e35b8SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, rval, rsh, max, zero, rval); 109409a52d85SRichard Henderson } else { 1095c17e35b8SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_GE, vece, lval, lsh, max, zero, lval); 1096c17e35b8SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_GE, vece, rval, rsh, max, zero, rval); 109709a52d85SRichard Henderson } 109809a52d85SRichard Henderson tcg_gen_or_vec(vece, dst, lval, rval); 109909a52d85SRichard Henderson } 110009a52d85SRichard Henderson 110109a52d85SRichard Henderson void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 110209a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 110309a52d85SRichard Henderson { 110409a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 110509a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_shlv_vec, 1106c17e35b8SRichard Henderson INDEX_op_shrv_vec, INDEX_op_cmpsel_vec, 0 110709a52d85SRichard Henderson }; 110809a52d85SRichard Henderson static const GVecGen3 ops[4] = { 110909a52d85SRichard Henderson { .fniv = gen_ushl_vec, 111009a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_b, 111109a52d85SRichard Henderson .opt_opc = vecop_list, 111209a52d85SRichard Henderson .vece = MO_8 }, 111309a52d85SRichard Henderson { .fniv = gen_ushl_vec, 111409a52d85SRichard Henderson .fno = gen_helper_gvec_ushl_h, 111509a52d85SRichard Henderson .opt_opc = vecop_list, 111609a52d85SRichard Henderson .vece = MO_16 }, 111709a52d85SRichard Henderson { .fni4 = gen_ushl_i32, 111809a52d85SRichard Henderson .fniv = gen_ushl_vec, 111909a52d85SRichard Henderson .opt_opc = vecop_list, 112009a52d85SRichard Henderson .vece = MO_32 }, 112109a52d85SRichard Henderson { .fni8 = gen_ushl_i64, 112209a52d85SRichard Henderson .fniv = gen_ushl_vec, 112309a52d85SRichard Henderson .opt_opc = vecop_list, 112409a52d85SRichard Henderson .vece = MO_64 }, 112509a52d85SRichard Henderson }; 112609a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 112709a52d85SRichard Henderson } 112809a52d85SRichard Henderson 112909a52d85SRichard Henderson void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) 113009a52d85SRichard Henderson { 113109a52d85SRichard Henderson TCGv_i32 lval = tcg_temp_new_i32(); 113209a52d85SRichard Henderson TCGv_i32 rval = tcg_temp_new_i32(); 113309a52d85SRichard Henderson TCGv_i32 lsh = tcg_temp_new_i32(); 113409a52d85SRichard Henderson TCGv_i32 rsh = tcg_temp_new_i32(); 113509a52d85SRichard Henderson TCGv_i32 zero = tcg_constant_i32(0); 113609a52d85SRichard Henderson TCGv_i32 max = tcg_constant_i32(31); 113709a52d85SRichard Henderson 113809a52d85SRichard Henderson /* 113909a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 114009a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 114109a52d85SRichard Henderson * Discard out-of-range results after the fact. 114209a52d85SRichard Henderson */ 114309a52d85SRichard Henderson tcg_gen_ext8s_i32(lsh, shift); 114409a52d85SRichard Henderson tcg_gen_neg_i32(rsh, lsh); 114509a52d85SRichard Henderson tcg_gen_shl_i32(lval, src, lsh); 114609a52d85SRichard Henderson tcg_gen_umin_i32(rsh, rsh, max); 114709a52d85SRichard Henderson tcg_gen_sar_i32(rval, src, rsh); 114809a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); 114909a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); 115009a52d85SRichard Henderson } 115109a52d85SRichard Henderson 115209a52d85SRichard Henderson void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) 115309a52d85SRichard Henderson { 115409a52d85SRichard Henderson TCGv_i64 lval = tcg_temp_new_i64(); 115509a52d85SRichard Henderson TCGv_i64 rval = tcg_temp_new_i64(); 115609a52d85SRichard Henderson TCGv_i64 lsh = tcg_temp_new_i64(); 115709a52d85SRichard Henderson TCGv_i64 rsh = tcg_temp_new_i64(); 115809a52d85SRichard Henderson TCGv_i64 zero = tcg_constant_i64(0); 115909a52d85SRichard Henderson TCGv_i64 max = tcg_constant_i64(63); 116009a52d85SRichard Henderson 116109a52d85SRichard Henderson /* 116209a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 116309a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 116409a52d85SRichard Henderson * Discard out-of-range results after the fact. 116509a52d85SRichard Henderson */ 116609a52d85SRichard Henderson tcg_gen_ext8s_i64(lsh, shift); 116709a52d85SRichard Henderson tcg_gen_neg_i64(rsh, lsh); 116809a52d85SRichard Henderson tcg_gen_shl_i64(lval, src, lsh); 116909a52d85SRichard Henderson tcg_gen_umin_i64(rsh, rsh, max); 117009a52d85SRichard Henderson tcg_gen_sar_i64(rval, src, rsh); 117109a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); 117209a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); 117309a52d85SRichard Henderson } 117409a52d85SRichard Henderson 117509a52d85SRichard Henderson static void gen_sshl_vec(unsigned vece, TCGv_vec dst, 117609a52d85SRichard Henderson TCGv_vec src, TCGv_vec shift) 117709a52d85SRichard Henderson { 117809a52d85SRichard Henderson TCGv_vec lval = tcg_temp_new_vec_matching(dst); 117909a52d85SRichard Henderson TCGv_vec rval = tcg_temp_new_vec_matching(dst); 118009a52d85SRichard Henderson TCGv_vec lsh = tcg_temp_new_vec_matching(dst); 118109a52d85SRichard Henderson TCGv_vec rsh = tcg_temp_new_vec_matching(dst); 1182143e179cSRichard Henderson TCGv_vec max, zero; 118309a52d85SRichard Henderson 118409a52d85SRichard Henderson /* 118509a52d85SRichard Henderson * Rely on the TCG guarantee that out of range shifts produce 118609a52d85SRichard Henderson * unspecified results, not undefined behaviour (i.e. no trap). 118709a52d85SRichard Henderson * Discard out-of-range results after the fact. 118809a52d85SRichard Henderson */ 118909a52d85SRichard Henderson tcg_gen_neg_vec(vece, rsh, shift); 119009a52d85SRichard Henderson if (vece == MO_8) { 119109a52d85SRichard Henderson tcg_gen_mov_vec(lsh, shift); 119209a52d85SRichard Henderson } else { 1193143e179cSRichard Henderson TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff); 1194143e179cSRichard Henderson tcg_gen_and_vec(vece, lsh, shift, msk); 1195143e179cSRichard Henderson tcg_gen_and_vec(vece, rsh, rsh, msk); 119609a52d85SRichard Henderson } 119709a52d85SRichard Henderson 119809a52d85SRichard Henderson /* Bound rsh so out of bound right shift gets -1. */ 1199143e179cSRichard Henderson max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1); 1200143e179cSRichard Henderson tcg_gen_umin_vec(vece, rsh, rsh, max); 120109a52d85SRichard Henderson 120209a52d85SRichard Henderson tcg_gen_shlv_vec(vece, lval, src, lsh); 120309a52d85SRichard Henderson tcg_gen_sarv_vec(vece, rval, src, rsh); 120409a52d85SRichard Henderson 120509a52d85SRichard Henderson /* Select in-bound left shift. */ 1206ee36a772SRichard Henderson zero = tcg_constant_vec_matching(dst, vece, 0); 1207ee36a772SRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_GT, vece, lval, lsh, max, zero, lval); 120809a52d85SRichard Henderson 120909a52d85SRichard Henderson /* Select between left and right shift. */ 121009a52d85SRichard Henderson if (vece == MO_8) { 1211143e179cSRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval); 121209a52d85SRichard Henderson } else { 1213143e179cSRichard Henderson TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80); 1214143e179cSRichard Henderson tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval); 121509a52d85SRichard Henderson } 121609a52d85SRichard Henderson } 121709a52d85SRichard Henderson 121809a52d85SRichard Henderson void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 121909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 122009a52d85SRichard Henderson { 122109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 122209a52d85SRichard Henderson INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, 1223ee36a772SRichard Henderson INDEX_op_sarv_vec, INDEX_op_cmpsel_vec, 0 122409a52d85SRichard Henderson }; 122509a52d85SRichard Henderson static const GVecGen3 ops[4] = { 122609a52d85SRichard Henderson { .fniv = gen_sshl_vec, 122709a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_b, 122809a52d85SRichard Henderson .opt_opc = vecop_list, 122909a52d85SRichard Henderson .vece = MO_8 }, 123009a52d85SRichard Henderson { .fniv = gen_sshl_vec, 123109a52d85SRichard Henderson .fno = gen_helper_gvec_sshl_h, 123209a52d85SRichard Henderson .opt_opc = vecop_list, 123309a52d85SRichard Henderson .vece = MO_16 }, 123409a52d85SRichard Henderson { .fni4 = gen_sshl_i32, 123509a52d85SRichard Henderson .fniv = gen_sshl_vec, 123609a52d85SRichard Henderson .opt_opc = vecop_list, 123709a52d85SRichard Henderson .vece = MO_32 }, 123809a52d85SRichard Henderson { .fni8 = gen_sshl_i64, 123909a52d85SRichard Henderson .fniv = gen_sshl_vec, 124009a52d85SRichard Henderson .opt_opc = vecop_list, 124109a52d85SRichard Henderson .vece = MO_64 }, 124209a52d85SRichard Henderson }; 124309a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 124409a52d85SRichard Henderson } 124509a52d85SRichard Henderson 1246940392c8SRichard Henderson void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1247940392c8SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1248940392c8SRichard Henderson { 1249940392c8SRichard Henderson static gen_helper_gvec_3 * const fns[] = { 1250940392c8SRichard Henderson gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h, 1251940392c8SRichard Henderson gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d, 1252940392c8SRichard Henderson }; 1253940392c8SRichard Henderson tcg_debug_assert(vece <= MO_64); 1254940392c8SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1255940392c8SRichard Henderson } 1256940392c8SRichard Henderson 1257940392c8SRichard Henderson void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1258940392c8SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1259940392c8SRichard Henderson { 1260940392c8SRichard Henderson static gen_helper_gvec_3 * const fns[] = { 1261940392c8SRichard Henderson gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h, 1262940392c8SRichard Henderson gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d, 1263940392c8SRichard Henderson }; 1264940392c8SRichard Henderson tcg_debug_assert(vece <= MO_64); 1265940392c8SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1266940392c8SRichard Henderson } 1267940392c8SRichard Henderson 1268e72a6878SRichard Henderson void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1269e72a6878SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1270e72a6878SRichard Henderson { 1271e72a6878SRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1272e72a6878SRichard Henderson gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h, 1273e72a6878SRichard Henderson gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d, 1274e72a6878SRichard Henderson }; 1275e72a6878SRichard Henderson tcg_debug_assert(vece <= MO_64); 1276e72a6878SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1277e72a6878SRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1278e72a6878SRichard Henderson } 1279e72a6878SRichard Henderson 1280e72a6878SRichard Henderson void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1281e72a6878SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1282e72a6878SRichard Henderson { 1283e72a6878SRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1284e72a6878SRichard Henderson gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h, 1285e72a6878SRichard Henderson gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d, 1286e72a6878SRichard Henderson }; 1287e72a6878SRichard Henderson tcg_debug_assert(vece <= MO_64); 1288e72a6878SRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1289e72a6878SRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1290e72a6878SRichard Henderson } 1291e72a6878SRichard Henderson 1292cef9d54fSRichard Henderson void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1293cef9d54fSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1294cef9d54fSRichard Henderson { 1295cef9d54fSRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1296cef9d54fSRichard Henderson gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h, 1297cef9d54fSRichard Henderson gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d, 1298cef9d54fSRichard Henderson }; 1299cef9d54fSRichard Henderson tcg_debug_assert(vece <= MO_64); 1300cef9d54fSRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1301cef9d54fSRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1302cef9d54fSRichard Henderson } 1303cef9d54fSRichard Henderson 1304cef9d54fSRichard Henderson void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1305cef9d54fSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1306cef9d54fSRichard Henderson { 1307cef9d54fSRichard Henderson static gen_helper_gvec_3_ptr * const fns[] = { 1308cef9d54fSRichard Henderson gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h, 1309cef9d54fSRichard Henderson gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d, 1310cef9d54fSRichard Henderson }; 1311cef9d54fSRichard Henderson tcg_debug_assert(vece <= MO_64); 1312cef9d54fSRichard Henderson tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, 1313cef9d54fSRichard Henderson opr_sz, max_sz, 0, fns[vece]); 1314cef9d54fSRichard Henderson } 1315cef9d54fSRichard Henderson 1316f4fa83d6SRichard Henderson void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1317f4fa83d6SRichard Henderson { 1318f4fa83d6SRichard Henderson uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); 1319f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1320f4fa83d6SRichard Henderson 1321f4fa83d6SRichard Henderson tcg_gen_add_i64(tmp, a, b); 1322f4fa83d6SRichard Henderson tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); 1323f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1324f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1325f4fa83d6SRichard Henderson } 1326f4fa83d6SRichard Henderson 1327f4fa83d6SRichard Henderson void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1328f4fa83d6SRichard Henderson { 1329f4fa83d6SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1330f4fa83d6SRichard Henderson 1331f4fa83d6SRichard Henderson tcg_gen_add_i64(t, a, b); 1332f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, 1333f4fa83d6SRichard Henderson tcg_constant_i64(UINT64_MAX), t); 1334f4fa83d6SRichard Henderson tcg_gen_xor_i64(t, t, res); 1335f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t); 1336f4fa83d6SRichard Henderson } 1337f4fa83d6SRichard Henderson 133876f4a8aeSRichard Henderson static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 133909a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 134009a52d85SRichard Henderson { 134109a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 134209a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 134309a52d85SRichard Henderson tcg_gen_usadd_vec(vece, t, a, b); 134476f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 134576f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 134609a52d85SRichard Henderson } 134709a52d85SRichard Henderson 134809a52d85SRichard Henderson void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 134909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 135009a52d85SRichard Henderson { 135109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 135276f4a8aeSRichard Henderson INDEX_op_usadd_vec, INDEX_op_add_vec, 0 135309a52d85SRichard Henderson }; 135409a52d85SRichard Henderson static const GVecGen4 ops[4] = { 135509a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 135609a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_b, 135709a52d85SRichard Henderson .write_aofs = true, 135809a52d85SRichard Henderson .opt_opc = vecop_list, 135909a52d85SRichard Henderson .vece = MO_8 }, 136009a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 136109a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_h, 136209a52d85SRichard Henderson .write_aofs = true, 136309a52d85SRichard Henderson .opt_opc = vecop_list, 136409a52d85SRichard Henderson .vece = MO_16 }, 136509a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 136609a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_s, 136709a52d85SRichard Henderson .write_aofs = true, 136809a52d85SRichard Henderson .opt_opc = vecop_list, 136909a52d85SRichard Henderson .vece = MO_32 }, 137009a52d85SRichard Henderson { .fniv = gen_uqadd_vec, 1371f4fa83d6SRichard Henderson .fni8 = gen_uqadd_d, 137209a52d85SRichard Henderson .fno = gen_helper_gvec_uqadd_d, 137309a52d85SRichard Henderson .write_aofs = true, 137409a52d85SRichard Henderson .opt_opc = vecop_list, 137509a52d85SRichard Henderson .vece = MO_64 }, 137609a52d85SRichard Henderson }; 137701d5665bSRichard Henderson 137801d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 137909a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 138009a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 138109a52d85SRichard Henderson } 138209a52d85SRichard Henderson 1383f4fa83d6SRichard Henderson void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1384f4fa83d6SRichard Henderson { 1385f4fa83d6SRichard Henderson int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1386f4fa83d6SRichard Henderson int64_t min = -1ll - max; 1387f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1388f4fa83d6SRichard Henderson 1389f4fa83d6SRichard Henderson tcg_gen_add_i64(tmp, a, b); 1390f4fa83d6SRichard Henderson tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1391f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1392f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1393f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1394f4fa83d6SRichard Henderson } 1395f4fa83d6SRichard Henderson 1396f4fa83d6SRichard Henderson void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1397f4fa83d6SRichard Henderson { 1398f4fa83d6SRichard Henderson TCGv_i64 t0 = tcg_temp_new_i64(); 1399f4fa83d6SRichard Henderson TCGv_i64 t1 = tcg_temp_new_i64(); 1400f4fa83d6SRichard Henderson TCGv_i64 t2 = tcg_temp_new_i64(); 1401f4fa83d6SRichard Henderson 1402f4fa83d6SRichard Henderson tcg_gen_add_i64(t0, a, b); 1403f4fa83d6SRichard Henderson 1404f4fa83d6SRichard Henderson /* Compute signed overflow indication into T1 */ 1405f4fa83d6SRichard Henderson tcg_gen_xor_i64(t1, a, b); 1406f4fa83d6SRichard Henderson tcg_gen_xor_i64(t2, t0, a); 1407f4fa83d6SRichard Henderson tcg_gen_andc_i64(t1, t2, t1); 1408f4fa83d6SRichard Henderson 1409f4fa83d6SRichard Henderson /* Compute saturated value into T2 */ 1410f4fa83d6SRichard Henderson tcg_gen_sari_i64(t2, a, 63); 1411f4fa83d6SRichard Henderson tcg_gen_xori_i64(t2, t2, INT64_MAX); 1412f4fa83d6SRichard Henderson 1413f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1414f4fa83d6SRichard Henderson tcg_gen_xor_i64(t0, t0, res); 1415f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t0); 1416f4fa83d6SRichard Henderson } 1417f4fa83d6SRichard Henderson 141876f4a8aeSRichard Henderson static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 141909a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 142009a52d85SRichard Henderson { 142109a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 142209a52d85SRichard Henderson tcg_gen_add_vec(vece, x, a, b); 142309a52d85SRichard Henderson tcg_gen_ssadd_vec(vece, t, a, b); 142476f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 142576f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 142609a52d85SRichard Henderson } 142709a52d85SRichard Henderson 142809a52d85SRichard Henderson void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 142909a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 143009a52d85SRichard Henderson { 143109a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 143276f4a8aeSRichard Henderson INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 143309a52d85SRichard Henderson }; 143409a52d85SRichard Henderson static const GVecGen4 ops[4] = { 143509a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 143609a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_b, 143709a52d85SRichard Henderson .opt_opc = vecop_list, 143809a52d85SRichard Henderson .write_aofs = true, 143909a52d85SRichard Henderson .vece = MO_8 }, 144009a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 144109a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_h, 144209a52d85SRichard Henderson .opt_opc = vecop_list, 144309a52d85SRichard Henderson .write_aofs = true, 144409a52d85SRichard Henderson .vece = MO_16 }, 144509a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 144609a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_s, 144709a52d85SRichard Henderson .opt_opc = vecop_list, 144809a52d85SRichard Henderson .write_aofs = true, 144909a52d85SRichard Henderson .vece = MO_32 }, 145009a52d85SRichard Henderson { .fniv = gen_sqadd_vec, 1451f4fa83d6SRichard Henderson .fni8 = gen_sqadd_d, 145209a52d85SRichard Henderson .fno = gen_helper_gvec_sqadd_d, 145309a52d85SRichard Henderson .opt_opc = vecop_list, 145409a52d85SRichard Henderson .write_aofs = true, 145509a52d85SRichard Henderson .vece = MO_64 }, 145609a52d85SRichard Henderson }; 145701d5665bSRichard Henderson 145801d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 145909a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 146009a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 146109a52d85SRichard Henderson } 146209a52d85SRichard Henderson 1463f4fa83d6SRichard Henderson void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1464f4fa83d6SRichard Henderson { 1465f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1466f4fa83d6SRichard Henderson 1467f4fa83d6SRichard Henderson tcg_gen_sub_i64(tmp, a, b); 1468f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); 1469f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1470f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1471f4fa83d6SRichard Henderson } 1472f4fa83d6SRichard Henderson 1473f4fa83d6SRichard Henderson void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1474f4fa83d6SRichard Henderson { 1475f4fa83d6SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1476f4fa83d6SRichard Henderson 1477f4fa83d6SRichard Henderson tcg_gen_sub_i64(t, a, b); 1478f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); 1479f4fa83d6SRichard Henderson tcg_gen_xor_i64(t, t, res); 1480f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t); 1481f4fa83d6SRichard Henderson } 1482f4fa83d6SRichard Henderson 148376f4a8aeSRichard Henderson static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 148409a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 148509a52d85SRichard Henderson { 148609a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 148709a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 148809a52d85SRichard Henderson tcg_gen_ussub_vec(vece, t, a, b); 148976f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 149076f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 149109a52d85SRichard Henderson } 149209a52d85SRichard Henderson 149309a52d85SRichard Henderson void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 149409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 149509a52d85SRichard Henderson { 149609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 149776f4a8aeSRichard Henderson INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 149809a52d85SRichard Henderson }; 149909a52d85SRichard Henderson static const GVecGen4 ops[4] = { 150009a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 150109a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_b, 150209a52d85SRichard Henderson .opt_opc = vecop_list, 150309a52d85SRichard Henderson .write_aofs = true, 150409a52d85SRichard Henderson .vece = MO_8 }, 150509a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 150609a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_h, 150709a52d85SRichard Henderson .opt_opc = vecop_list, 150809a52d85SRichard Henderson .write_aofs = true, 150909a52d85SRichard Henderson .vece = MO_16 }, 151009a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 151109a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_s, 151209a52d85SRichard Henderson .opt_opc = vecop_list, 151309a52d85SRichard Henderson .write_aofs = true, 151409a52d85SRichard Henderson .vece = MO_32 }, 151509a52d85SRichard Henderson { .fniv = gen_uqsub_vec, 1516f4fa83d6SRichard Henderson .fni8 = gen_uqsub_d, 151709a52d85SRichard Henderson .fno = gen_helper_gvec_uqsub_d, 151809a52d85SRichard Henderson .opt_opc = vecop_list, 151909a52d85SRichard Henderson .write_aofs = true, 152009a52d85SRichard Henderson .vece = MO_64 }, 152109a52d85SRichard Henderson }; 152201d5665bSRichard Henderson 152301d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 152409a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 152509a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 152609a52d85SRichard Henderson } 152709a52d85SRichard Henderson 1528f4fa83d6SRichard Henderson void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) 1529f4fa83d6SRichard Henderson { 1530f4fa83d6SRichard Henderson int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); 1531f4fa83d6SRichard Henderson int64_t min = -1ll - max; 1532f4fa83d6SRichard Henderson TCGv_i64 tmp = tcg_temp_new_i64(); 1533f4fa83d6SRichard Henderson 1534f4fa83d6SRichard Henderson tcg_gen_sub_i64(tmp, a, b); 1535f4fa83d6SRichard Henderson tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); 1536f4fa83d6SRichard Henderson tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); 1537f4fa83d6SRichard Henderson tcg_gen_xor_i64(tmp, tmp, res); 1538f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, tmp); 1539f4fa83d6SRichard Henderson } 1540f4fa83d6SRichard Henderson 1541f4fa83d6SRichard Henderson void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) 1542f4fa83d6SRichard Henderson { 1543f4fa83d6SRichard Henderson TCGv_i64 t0 = tcg_temp_new_i64(); 1544f4fa83d6SRichard Henderson TCGv_i64 t1 = tcg_temp_new_i64(); 1545f4fa83d6SRichard Henderson TCGv_i64 t2 = tcg_temp_new_i64(); 1546f4fa83d6SRichard Henderson 1547f4fa83d6SRichard Henderson tcg_gen_sub_i64(t0, a, b); 1548f4fa83d6SRichard Henderson 1549f4fa83d6SRichard Henderson /* Compute signed overflow indication into T1 */ 1550f4fa83d6SRichard Henderson tcg_gen_xor_i64(t1, a, b); 1551f4fa83d6SRichard Henderson tcg_gen_xor_i64(t2, t0, a); 1552f4fa83d6SRichard Henderson tcg_gen_and_i64(t1, t1, t2); 1553f4fa83d6SRichard Henderson 1554f4fa83d6SRichard Henderson /* Compute saturated value into T2 */ 1555f4fa83d6SRichard Henderson tcg_gen_sari_i64(t2, a, 63); 1556f4fa83d6SRichard Henderson tcg_gen_xori_i64(t2, t2, INT64_MAX); 1557f4fa83d6SRichard Henderson 1558f4fa83d6SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); 1559f4fa83d6SRichard Henderson tcg_gen_xor_i64(t0, t0, res); 1560f4fa83d6SRichard Henderson tcg_gen_or_i64(qc, qc, t0); 1561f4fa83d6SRichard Henderson } 1562f4fa83d6SRichard Henderson 156376f4a8aeSRichard Henderson static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, 156409a52d85SRichard Henderson TCGv_vec a, TCGv_vec b) 156509a52d85SRichard Henderson { 156609a52d85SRichard Henderson TCGv_vec x = tcg_temp_new_vec_matching(t); 156709a52d85SRichard Henderson tcg_gen_sub_vec(vece, x, a, b); 156809a52d85SRichard Henderson tcg_gen_sssub_vec(vece, t, a, b); 156976f4a8aeSRichard Henderson tcg_gen_xor_vec(vece, x, x, t); 157076f4a8aeSRichard Henderson tcg_gen_or_vec(vece, qc, qc, x); 157109a52d85SRichard Henderson } 157209a52d85SRichard Henderson 157309a52d85SRichard Henderson void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 157409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 157509a52d85SRichard Henderson { 157609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 157776f4a8aeSRichard Henderson INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 157809a52d85SRichard Henderson }; 157909a52d85SRichard Henderson static const GVecGen4 ops[4] = { 158009a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 158109a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_b, 158209a52d85SRichard Henderson .opt_opc = vecop_list, 158309a52d85SRichard Henderson .write_aofs = true, 158409a52d85SRichard Henderson .vece = MO_8 }, 158509a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 158609a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_h, 158709a52d85SRichard Henderson .opt_opc = vecop_list, 158809a52d85SRichard Henderson .write_aofs = true, 158909a52d85SRichard Henderson .vece = MO_16 }, 159009a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 159109a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_s, 159209a52d85SRichard Henderson .opt_opc = vecop_list, 159309a52d85SRichard Henderson .write_aofs = true, 159409a52d85SRichard Henderson .vece = MO_32 }, 159509a52d85SRichard Henderson { .fniv = gen_sqsub_vec, 1596f4fa83d6SRichard Henderson .fni8 = gen_sqsub_d, 159709a52d85SRichard Henderson .fno = gen_helper_gvec_sqsub_d, 159809a52d85SRichard Henderson .opt_opc = vecop_list, 159909a52d85SRichard Henderson .write_aofs = true, 160009a52d85SRichard Henderson .vece = MO_64 }, 160109a52d85SRichard Henderson }; 160201d5665bSRichard Henderson 160301d5665bSRichard Henderson tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); 160409a52d85SRichard Henderson tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), 160509a52d85SRichard Henderson rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 160609a52d85SRichard Henderson } 160709a52d85SRichard Henderson 160809a52d85SRichard Henderson static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 160909a52d85SRichard Henderson { 161009a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 161109a52d85SRichard Henderson 161209a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 161309a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 161409a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); 161509a52d85SRichard Henderson } 161609a52d85SRichard Henderson 161709a52d85SRichard Henderson static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 161809a52d85SRichard Henderson { 161909a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 162009a52d85SRichard Henderson 162109a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 162209a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 162309a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); 162409a52d85SRichard Henderson } 162509a52d85SRichard Henderson 162609a52d85SRichard Henderson static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 162709a52d85SRichard Henderson { 162809a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 162909a52d85SRichard Henderson 163009a52d85SRichard Henderson tcg_gen_smin_vec(vece, t, a, b); 163109a52d85SRichard Henderson tcg_gen_smax_vec(vece, d, a, b); 163209a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 163309a52d85SRichard Henderson } 163409a52d85SRichard Henderson 163509a52d85SRichard Henderson void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 163609a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 163709a52d85SRichard Henderson { 163809a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 163909a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 164009a52d85SRichard Henderson }; 164109a52d85SRichard Henderson static const GVecGen3 ops[4] = { 164209a52d85SRichard Henderson { .fniv = gen_sabd_vec, 164309a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_b, 164409a52d85SRichard Henderson .opt_opc = vecop_list, 164509a52d85SRichard Henderson .vece = MO_8 }, 164609a52d85SRichard Henderson { .fniv = gen_sabd_vec, 164709a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_h, 164809a52d85SRichard Henderson .opt_opc = vecop_list, 164909a52d85SRichard Henderson .vece = MO_16 }, 165009a52d85SRichard Henderson { .fni4 = gen_sabd_i32, 165109a52d85SRichard Henderson .fniv = gen_sabd_vec, 165209a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_s, 165309a52d85SRichard Henderson .opt_opc = vecop_list, 165409a52d85SRichard Henderson .vece = MO_32 }, 165509a52d85SRichard Henderson { .fni8 = gen_sabd_i64, 165609a52d85SRichard Henderson .fniv = gen_sabd_vec, 165709a52d85SRichard Henderson .fno = gen_helper_gvec_sabd_d, 165809a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 165909a52d85SRichard Henderson .opt_opc = vecop_list, 166009a52d85SRichard Henderson .vece = MO_64 }, 166109a52d85SRichard Henderson }; 166209a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 166309a52d85SRichard Henderson } 166409a52d85SRichard Henderson 166509a52d85SRichard Henderson static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 166609a52d85SRichard Henderson { 166709a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 166809a52d85SRichard Henderson 166909a52d85SRichard Henderson tcg_gen_sub_i32(t, a, b); 167009a52d85SRichard Henderson tcg_gen_sub_i32(d, b, a); 167109a52d85SRichard Henderson tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); 167209a52d85SRichard Henderson } 167309a52d85SRichard Henderson 167409a52d85SRichard Henderson static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 167509a52d85SRichard Henderson { 167609a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 167709a52d85SRichard Henderson 167809a52d85SRichard Henderson tcg_gen_sub_i64(t, a, b); 167909a52d85SRichard Henderson tcg_gen_sub_i64(d, b, a); 168009a52d85SRichard Henderson tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); 168109a52d85SRichard Henderson } 168209a52d85SRichard Henderson 168309a52d85SRichard Henderson static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 168409a52d85SRichard Henderson { 168509a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 168609a52d85SRichard Henderson 168709a52d85SRichard Henderson tcg_gen_umin_vec(vece, t, a, b); 168809a52d85SRichard Henderson tcg_gen_umax_vec(vece, d, a, b); 168909a52d85SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 169009a52d85SRichard Henderson } 169109a52d85SRichard Henderson 169209a52d85SRichard Henderson void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 169309a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 169409a52d85SRichard Henderson { 169509a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 169609a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 169709a52d85SRichard Henderson }; 169809a52d85SRichard Henderson static const GVecGen3 ops[4] = { 169909a52d85SRichard Henderson { .fniv = gen_uabd_vec, 170009a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_b, 170109a52d85SRichard Henderson .opt_opc = vecop_list, 170209a52d85SRichard Henderson .vece = MO_8 }, 170309a52d85SRichard Henderson { .fniv = gen_uabd_vec, 170409a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_h, 170509a52d85SRichard Henderson .opt_opc = vecop_list, 170609a52d85SRichard Henderson .vece = MO_16 }, 170709a52d85SRichard Henderson { .fni4 = gen_uabd_i32, 170809a52d85SRichard Henderson .fniv = gen_uabd_vec, 170909a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_s, 171009a52d85SRichard Henderson .opt_opc = vecop_list, 171109a52d85SRichard Henderson .vece = MO_32 }, 171209a52d85SRichard Henderson { .fni8 = gen_uabd_i64, 171309a52d85SRichard Henderson .fniv = gen_uabd_vec, 171409a52d85SRichard Henderson .fno = gen_helper_gvec_uabd_d, 171509a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 171609a52d85SRichard Henderson .opt_opc = vecop_list, 171709a52d85SRichard Henderson .vece = MO_64 }, 171809a52d85SRichard Henderson }; 171909a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 172009a52d85SRichard Henderson } 172109a52d85SRichard Henderson 172209a52d85SRichard Henderson static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 172309a52d85SRichard Henderson { 172409a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 172509a52d85SRichard Henderson gen_sabd_i32(t, a, b); 172609a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 172709a52d85SRichard Henderson } 172809a52d85SRichard Henderson 172909a52d85SRichard Henderson static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 173009a52d85SRichard Henderson { 173109a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 173209a52d85SRichard Henderson gen_sabd_i64(t, a, b); 173309a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 173409a52d85SRichard Henderson } 173509a52d85SRichard Henderson 173609a52d85SRichard Henderson static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 173709a52d85SRichard Henderson { 173809a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 173909a52d85SRichard Henderson gen_sabd_vec(vece, t, a, b); 174009a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 174109a52d85SRichard Henderson } 174209a52d85SRichard Henderson 174309a52d85SRichard Henderson void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 174409a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 174509a52d85SRichard Henderson { 174609a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 174709a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 174809a52d85SRichard Henderson INDEX_op_smin_vec, INDEX_op_smax_vec, 0 174909a52d85SRichard Henderson }; 175009a52d85SRichard Henderson static const GVecGen3 ops[4] = { 175109a52d85SRichard Henderson { .fniv = gen_saba_vec, 175209a52d85SRichard Henderson .fno = gen_helper_gvec_saba_b, 175309a52d85SRichard Henderson .opt_opc = vecop_list, 175409a52d85SRichard Henderson .load_dest = true, 175509a52d85SRichard Henderson .vece = MO_8 }, 175609a52d85SRichard Henderson { .fniv = gen_saba_vec, 175709a52d85SRichard Henderson .fno = gen_helper_gvec_saba_h, 175809a52d85SRichard Henderson .opt_opc = vecop_list, 175909a52d85SRichard Henderson .load_dest = true, 176009a52d85SRichard Henderson .vece = MO_16 }, 176109a52d85SRichard Henderson { .fni4 = gen_saba_i32, 176209a52d85SRichard Henderson .fniv = gen_saba_vec, 176309a52d85SRichard Henderson .fno = gen_helper_gvec_saba_s, 176409a52d85SRichard Henderson .opt_opc = vecop_list, 176509a52d85SRichard Henderson .load_dest = true, 176609a52d85SRichard Henderson .vece = MO_32 }, 176709a52d85SRichard Henderson { .fni8 = gen_saba_i64, 176809a52d85SRichard Henderson .fniv = gen_saba_vec, 176909a52d85SRichard Henderson .fno = gen_helper_gvec_saba_d, 177009a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 177109a52d85SRichard Henderson .opt_opc = vecop_list, 177209a52d85SRichard Henderson .load_dest = true, 177309a52d85SRichard Henderson .vece = MO_64 }, 177409a52d85SRichard Henderson }; 177509a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 177609a52d85SRichard Henderson } 177709a52d85SRichard Henderson 177809a52d85SRichard Henderson static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 177909a52d85SRichard Henderson { 178009a52d85SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 178109a52d85SRichard Henderson gen_uabd_i32(t, a, b); 178209a52d85SRichard Henderson tcg_gen_add_i32(d, d, t); 178309a52d85SRichard Henderson } 178409a52d85SRichard Henderson 178509a52d85SRichard Henderson static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 178609a52d85SRichard Henderson { 178709a52d85SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 178809a52d85SRichard Henderson gen_uabd_i64(t, a, b); 178909a52d85SRichard Henderson tcg_gen_add_i64(d, d, t); 179009a52d85SRichard Henderson } 179109a52d85SRichard Henderson 179209a52d85SRichard Henderson static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 179309a52d85SRichard Henderson { 179409a52d85SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 179509a52d85SRichard Henderson gen_uabd_vec(vece, t, a, b); 179609a52d85SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 179709a52d85SRichard Henderson } 179809a52d85SRichard Henderson 179909a52d85SRichard Henderson void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 180009a52d85SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 180109a52d85SRichard Henderson { 180209a52d85SRichard Henderson static const TCGOpcode vecop_list[] = { 180309a52d85SRichard Henderson INDEX_op_sub_vec, INDEX_op_add_vec, 180409a52d85SRichard Henderson INDEX_op_umin_vec, INDEX_op_umax_vec, 0 180509a52d85SRichard Henderson }; 180609a52d85SRichard Henderson static const GVecGen3 ops[4] = { 180709a52d85SRichard Henderson { .fniv = gen_uaba_vec, 180809a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_b, 180909a52d85SRichard Henderson .opt_opc = vecop_list, 181009a52d85SRichard Henderson .load_dest = true, 181109a52d85SRichard Henderson .vece = MO_8 }, 181209a52d85SRichard Henderson { .fniv = gen_uaba_vec, 181309a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_h, 181409a52d85SRichard Henderson .opt_opc = vecop_list, 181509a52d85SRichard Henderson .load_dest = true, 181609a52d85SRichard Henderson .vece = MO_16 }, 181709a52d85SRichard Henderson { .fni4 = gen_uaba_i32, 181809a52d85SRichard Henderson .fniv = gen_uaba_vec, 181909a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_s, 182009a52d85SRichard Henderson .opt_opc = vecop_list, 182109a52d85SRichard Henderson .load_dest = true, 182209a52d85SRichard Henderson .vece = MO_32 }, 182309a52d85SRichard Henderson { .fni8 = gen_uaba_i64, 182409a52d85SRichard Henderson .fniv = gen_uaba_vec, 182509a52d85SRichard Henderson .fno = gen_helper_gvec_uaba_d, 182609a52d85SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 182709a52d85SRichard Henderson .opt_opc = vecop_list, 182809a52d85SRichard Henderson .load_dest = true, 182909a52d85SRichard Henderson .vece = MO_64 }, 183009a52d85SRichard Henderson }; 183109a52d85SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); 183209a52d85SRichard Henderson } 1833a7e4eec6SRichard Henderson 1834a7e4eec6SRichard Henderson void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1835a7e4eec6SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1836a7e4eec6SRichard Henderson { 1837a7e4eec6SRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 1838a7e4eec6SRichard Henderson gen_helper_gvec_addp_b, 1839a7e4eec6SRichard Henderson gen_helper_gvec_addp_h, 1840a7e4eec6SRichard Henderson gen_helper_gvec_addp_s, 1841a7e4eec6SRichard Henderson gen_helper_gvec_addp_d, 1842a7e4eec6SRichard Henderson }; 1843a7e4eec6SRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 1844a7e4eec6SRichard Henderson } 184528b5451bSRichard Henderson 184628b5451bSRichard Henderson void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 184728b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 184828b5451bSRichard Henderson { 184928b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 185028b5451bSRichard Henderson gen_helper_gvec_smaxp_b, 185128b5451bSRichard Henderson gen_helper_gvec_smaxp_h, 185228b5451bSRichard Henderson gen_helper_gvec_smaxp_s, 185328b5451bSRichard Henderson }; 185428b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 185528b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 185628b5451bSRichard Henderson } 185728b5451bSRichard Henderson 185828b5451bSRichard Henderson void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 185928b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 186028b5451bSRichard Henderson { 186128b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 186228b5451bSRichard Henderson gen_helper_gvec_sminp_b, 186328b5451bSRichard Henderson gen_helper_gvec_sminp_h, 186428b5451bSRichard Henderson gen_helper_gvec_sminp_s, 186528b5451bSRichard Henderson }; 186628b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 186728b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 186828b5451bSRichard Henderson } 186928b5451bSRichard Henderson 187028b5451bSRichard Henderson void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 187128b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 187228b5451bSRichard Henderson { 187328b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 187428b5451bSRichard Henderson gen_helper_gvec_umaxp_b, 187528b5451bSRichard Henderson gen_helper_gvec_umaxp_h, 187628b5451bSRichard Henderson gen_helper_gvec_umaxp_s, 187728b5451bSRichard Henderson }; 187828b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 187928b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 188028b5451bSRichard Henderson } 188128b5451bSRichard Henderson 188228b5451bSRichard Henderson void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 188328b5451bSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 188428b5451bSRichard Henderson { 188528b5451bSRichard Henderson static gen_helper_gvec_3 * const fns[4] = { 188628b5451bSRichard Henderson gen_helper_gvec_uminp_b, 188728b5451bSRichard Henderson gen_helper_gvec_uminp_h, 188828b5451bSRichard Henderson gen_helper_gvec_uminp_s, 188928b5451bSRichard Henderson }; 189028b5451bSRichard Henderson tcg_debug_assert(vece <= MO_32); 189128b5451bSRichard Henderson tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); 189228b5451bSRichard Henderson } 1893203aca91SRichard Henderson 1894203aca91SRichard Henderson static void gen_shadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1895203aca91SRichard Henderson { 1896203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1897203aca91SRichard Henderson 1898203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1899203aca91SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, 1); 1900203aca91SRichard Henderson tcg_gen_vec_sar8i_i64(b, b, 1); 1901203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 1902203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 1903203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 1904203aca91SRichard Henderson } 1905203aca91SRichard Henderson 1906203aca91SRichard Henderson static void gen_shadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1907203aca91SRichard Henderson { 1908203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1909203aca91SRichard Henderson 1910203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1911203aca91SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, 1); 1912203aca91SRichard Henderson tcg_gen_vec_sar16i_i64(b, b, 1); 1913203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 1914203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 1915203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 1916203aca91SRichard Henderson } 1917203aca91SRichard Henderson 1918203aca91SRichard Henderson static void gen_shadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1919203aca91SRichard Henderson { 1920203aca91SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 1921203aca91SRichard Henderson 1922203aca91SRichard Henderson tcg_gen_and_i32(t, a, b); 1923203aca91SRichard Henderson tcg_gen_sari_i32(a, a, 1); 1924203aca91SRichard Henderson tcg_gen_sari_i32(b, b, 1); 1925203aca91SRichard Henderson tcg_gen_andi_i32(t, t, 1); 1926203aca91SRichard Henderson tcg_gen_add_i32(d, a, b); 1927203aca91SRichard Henderson tcg_gen_add_i32(d, d, t); 1928203aca91SRichard Henderson } 1929203aca91SRichard Henderson 1930203aca91SRichard Henderson static void gen_shadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 1931203aca91SRichard Henderson { 1932203aca91SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 1933203aca91SRichard Henderson 1934203aca91SRichard Henderson tcg_gen_and_vec(vece, t, a, b); 1935203aca91SRichard Henderson tcg_gen_sari_vec(vece, a, a, 1); 1936203aca91SRichard Henderson tcg_gen_sari_vec(vece, b, b, 1); 1937203aca91SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 1938203aca91SRichard Henderson tcg_gen_add_vec(vece, d, a, b); 1939203aca91SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 1940203aca91SRichard Henderson } 1941203aca91SRichard Henderson 1942203aca91SRichard Henderson void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 1943203aca91SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 1944203aca91SRichard Henderson { 1945203aca91SRichard Henderson static const TCGOpcode vecop_list[] = { 1946203aca91SRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 1947203aca91SRichard Henderson }; 1948203aca91SRichard Henderson static const GVecGen3 g[] = { 1949203aca91SRichard Henderson { .fni8 = gen_shadd8_i64, 1950203aca91SRichard Henderson .fniv = gen_shadd_vec, 1951203aca91SRichard Henderson .opt_opc = vecop_list, 1952203aca91SRichard Henderson .vece = MO_8 }, 1953203aca91SRichard Henderson { .fni8 = gen_shadd16_i64, 1954203aca91SRichard Henderson .fniv = gen_shadd_vec, 1955203aca91SRichard Henderson .opt_opc = vecop_list, 1956203aca91SRichard Henderson .vece = MO_16 }, 1957203aca91SRichard Henderson { .fni4 = gen_shadd_i32, 1958203aca91SRichard Henderson .fniv = gen_shadd_vec, 1959203aca91SRichard Henderson .opt_opc = vecop_list, 1960203aca91SRichard Henderson .vece = MO_32 }, 1961203aca91SRichard Henderson }; 1962203aca91SRichard Henderson tcg_debug_assert(vece <= MO_32); 1963203aca91SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 1964203aca91SRichard Henderson } 1965203aca91SRichard Henderson 1966203aca91SRichard Henderson static void gen_uhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1967203aca91SRichard Henderson { 1968203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1969203aca91SRichard Henderson 1970203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1971203aca91SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, 1); 1972203aca91SRichard Henderson tcg_gen_vec_shr8i_i64(b, b, 1); 1973203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 1974203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 1975203aca91SRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 1976203aca91SRichard Henderson } 1977203aca91SRichard Henderson 1978203aca91SRichard Henderson static void gen_uhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1979203aca91SRichard Henderson { 1980203aca91SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 1981203aca91SRichard Henderson 1982203aca91SRichard Henderson tcg_gen_and_i64(t, a, b); 1983203aca91SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, 1); 1984203aca91SRichard Henderson tcg_gen_vec_shr16i_i64(b, b, 1); 1985203aca91SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 1986203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 1987203aca91SRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 1988203aca91SRichard Henderson } 1989203aca91SRichard Henderson 1990203aca91SRichard Henderson static void gen_uhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 1991203aca91SRichard Henderson { 1992203aca91SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 1993203aca91SRichard Henderson 1994203aca91SRichard Henderson tcg_gen_and_i32(t, a, b); 1995203aca91SRichard Henderson tcg_gen_shri_i32(a, a, 1); 1996203aca91SRichard Henderson tcg_gen_shri_i32(b, b, 1); 1997203aca91SRichard Henderson tcg_gen_andi_i32(t, t, 1); 1998203aca91SRichard Henderson tcg_gen_add_i32(d, a, b); 1999203aca91SRichard Henderson tcg_gen_add_i32(d, d, t); 2000203aca91SRichard Henderson } 2001203aca91SRichard Henderson 2002203aca91SRichard Henderson static void gen_uhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 2003203aca91SRichard Henderson { 2004203aca91SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 2005203aca91SRichard Henderson 2006203aca91SRichard Henderson tcg_gen_and_vec(vece, t, a, b); 2007203aca91SRichard Henderson tcg_gen_shri_vec(vece, a, a, 1); 2008203aca91SRichard Henderson tcg_gen_shri_vec(vece, b, b, 1); 2009203aca91SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 2010203aca91SRichard Henderson tcg_gen_add_vec(vece, d, a, b); 2011203aca91SRichard Henderson tcg_gen_add_vec(vece, d, d, t); 2012203aca91SRichard Henderson } 2013203aca91SRichard Henderson 2014203aca91SRichard Henderson void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 2015203aca91SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 2016203aca91SRichard Henderson { 2017203aca91SRichard Henderson static const TCGOpcode vecop_list[] = { 2018203aca91SRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 2019203aca91SRichard Henderson }; 2020203aca91SRichard Henderson static const GVecGen3 g[] = { 2021203aca91SRichard Henderson { .fni8 = gen_uhadd8_i64, 2022203aca91SRichard Henderson .fniv = gen_uhadd_vec, 2023203aca91SRichard Henderson .opt_opc = vecop_list, 2024203aca91SRichard Henderson .vece = MO_8 }, 2025203aca91SRichard Henderson { .fni8 = gen_uhadd16_i64, 2026203aca91SRichard Henderson .fniv = gen_uhadd_vec, 2027203aca91SRichard Henderson .opt_opc = vecop_list, 2028203aca91SRichard Henderson .vece = MO_16 }, 2029203aca91SRichard Henderson { .fni4 = gen_uhadd_i32, 2030203aca91SRichard Henderson .fniv = gen_uhadd_vec, 2031203aca91SRichard Henderson .opt_opc = vecop_list, 2032203aca91SRichard Henderson .vece = MO_32 }, 2033203aca91SRichard Henderson }; 2034203aca91SRichard Henderson tcg_debug_assert(vece <= MO_32); 2035203aca91SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 2036203aca91SRichard Henderson } 203734c0d865SRichard Henderson 203834c0d865SRichard Henderson static void gen_shsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 203934c0d865SRichard Henderson { 204034c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 204134c0d865SRichard Henderson 204234c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 204334c0d865SRichard Henderson tcg_gen_vec_sar8i_i64(a, a, 1); 204434c0d865SRichard Henderson tcg_gen_vec_sar8i_i64(b, b, 1); 204534c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 204634c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, a, b); 204734c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, d, t); 204834c0d865SRichard Henderson } 204934c0d865SRichard Henderson 205034c0d865SRichard Henderson static void gen_shsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 205134c0d865SRichard Henderson { 205234c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 205334c0d865SRichard Henderson 205434c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 205534c0d865SRichard Henderson tcg_gen_vec_sar16i_i64(a, a, 1); 205634c0d865SRichard Henderson tcg_gen_vec_sar16i_i64(b, b, 1); 205734c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 205834c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, a, b); 205934c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, d, t); 206034c0d865SRichard Henderson } 206134c0d865SRichard Henderson 206234c0d865SRichard Henderson static void gen_shsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 206334c0d865SRichard Henderson { 206434c0d865SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 206534c0d865SRichard Henderson 206634c0d865SRichard Henderson tcg_gen_andc_i32(t, b, a); 206734c0d865SRichard Henderson tcg_gen_sari_i32(a, a, 1); 206834c0d865SRichard Henderson tcg_gen_sari_i32(b, b, 1); 206934c0d865SRichard Henderson tcg_gen_andi_i32(t, t, 1); 207034c0d865SRichard Henderson tcg_gen_sub_i32(d, a, b); 207134c0d865SRichard Henderson tcg_gen_sub_i32(d, d, t); 207234c0d865SRichard Henderson } 207334c0d865SRichard Henderson 207434c0d865SRichard Henderson static void gen_shsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 207534c0d865SRichard Henderson { 207634c0d865SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 207734c0d865SRichard Henderson 207834c0d865SRichard Henderson tcg_gen_andc_vec(vece, t, b, a); 207934c0d865SRichard Henderson tcg_gen_sari_vec(vece, a, a, 1); 208034c0d865SRichard Henderson tcg_gen_sari_vec(vece, b, b, 1); 208134c0d865SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 208234c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, a, b); 208334c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 208434c0d865SRichard Henderson } 208534c0d865SRichard Henderson 208634c0d865SRichard Henderson void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 208734c0d865SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 208834c0d865SRichard Henderson { 208934c0d865SRichard Henderson static const TCGOpcode vecop_list[] = { 209034c0d865SRichard Henderson INDEX_op_sari_vec, INDEX_op_sub_vec, 0 209134c0d865SRichard Henderson }; 209234c0d865SRichard Henderson static const GVecGen3 g[4] = { 209334c0d865SRichard Henderson { .fni8 = gen_shsub8_i64, 209434c0d865SRichard Henderson .fniv = gen_shsub_vec, 209534c0d865SRichard Henderson .opt_opc = vecop_list, 209634c0d865SRichard Henderson .vece = MO_8 }, 209734c0d865SRichard Henderson { .fni8 = gen_shsub16_i64, 209834c0d865SRichard Henderson .fniv = gen_shsub_vec, 209934c0d865SRichard Henderson .opt_opc = vecop_list, 210034c0d865SRichard Henderson .vece = MO_16 }, 210134c0d865SRichard Henderson { .fni4 = gen_shsub_i32, 210234c0d865SRichard Henderson .fniv = gen_shsub_vec, 210334c0d865SRichard Henderson .opt_opc = vecop_list, 210434c0d865SRichard Henderson .vece = MO_32 }, 210534c0d865SRichard Henderson }; 210634c0d865SRichard Henderson assert(vece <= MO_32); 210734c0d865SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 210834c0d865SRichard Henderson } 210934c0d865SRichard Henderson 211034c0d865SRichard Henderson static void gen_uhsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 211134c0d865SRichard Henderson { 211234c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 211334c0d865SRichard Henderson 211434c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 211534c0d865SRichard Henderson tcg_gen_vec_shr8i_i64(a, a, 1); 211634c0d865SRichard Henderson tcg_gen_vec_shr8i_i64(b, b, 1); 211734c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 211834c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, a, b); 211934c0d865SRichard Henderson tcg_gen_vec_sub8_i64(d, d, t); 212034c0d865SRichard Henderson } 212134c0d865SRichard Henderson 212234c0d865SRichard Henderson static void gen_uhsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 212334c0d865SRichard Henderson { 212434c0d865SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 212534c0d865SRichard Henderson 212634c0d865SRichard Henderson tcg_gen_andc_i64(t, b, a); 212734c0d865SRichard Henderson tcg_gen_vec_shr16i_i64(a, a, 1); 212834c0d865SRichard Henderson tcg_gen_vec_shr16i_i64(b, b, 1); 212934c0d865SRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 213034c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, a, b); 213134c0d865SRichard Henderson tcg_gen_vec_sub16_i64(d, d, t); 213234c0d865SRichard Henderson } 213334c0d865SRichard Henderson 213434c0d865SRichard Henderson static void gen_uhsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 213534c0d865SRichard Henderson { 213634c0d865SRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 213734c0d865SRichard Henderson 213834c0d865SRichard Henderson tcg_gen_andc_i32(t, b, a); 213934c0d865SRichard Henderson tcg_gen_shri_i32(a, a, 1); 214034c0d865SRichard Henderson tcg_gen_shri_i32(b, b, 1); 214134c0d865SRichard Henderson tcg_gen_andi_i32(t, t, 1); 214234c0d865SRichard Henderson tcg_gen_sub_i32(d, a, b); 214334c0d865SRichard Henderson tcg_gen_sub_i32(d, d, t); 214434c0d865SRichard Henderson } 214534c0d865SRichard Henderson 214634c0d865SRichard Henderson static void gen_uhsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 214734c0d865SRichard Henderson { 214834c0d865SRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 214934c0d865SRichard Henderson 215034c0d865SRichard Henderson tcg_gen_andc_vec(vece, t, b, a); 215134c0d865SRichard Henderson tcg_gen_shri_vec(vece, a, a, 1); 215234c0d865SRichard Henderson tcg_gen_shri_vec(vece, b, b, 1); 215334c0d865SRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 215434c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, a, b); 215534c0d865SRichard Henderson tcg_gen_sub_vec(vece, d, d, t); 215634c0d865SRichard Henderson } 215734c0d865SRichard Henderson 215834c0d865SRichard Henderson void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 215934c0d865SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 216034c0d865SRichard Henderson { 216134c0d865SRichard Henderson static const TCGOpcode vecop_list[] = { 216234c0d865SRichard Henderson INDEX_op_shri_vec, INDEX_op_sub_vec, 0 216334c0d865SRichard Henderson }; 216434c0d865SRichard Henderson static const GVecGen3 g[4] = { 216534c0d865SRichard Henderson { .fni8 = gen_uhsub8_i64, 216634c0d865SRichard Henderson .fniv = gen_uhsub_vec, 216734c0d865SRichard Henderson .opt_opc = vecop_list, 216834c0d865SRichard Henderson .vece = MO_8 }, 216934c0d865SRichard Henderson { .fni8 = gen_uhsub16_i64, 217034c0d865SRichard Henderson .fniv = gen_uhsub_vec, 217134c0d865SRichard Henderson .opt_opc = vecop_list, 217234c0d865SRichard Henderson .vece = MO_16 }, 217334c0d865SRichard Henderson { .fni4 = gen_uhsub_i32, 217434c0d865SRichard Henderson .fniv = gen_uhsub_vec, 217534c0d865SRichard Henderson .opt_opc = vecop_list, 217634c0d865SRichard Henderson .vece = MO_32 }, 217734c0d865SRichard Henderson }; 217834c0d865SRichard Henderson assert(vece <= MO_32); 217934c0d865SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 218034c0d865SRichard Henderson } 21818989b95eSRichard Henderson 21828989b95eSRichard Henderson static void gen_srhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 21838989b95eSRichard Henderson { 21848989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 21858989b95eSRichard Henderson 21868989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 21878989b95eSRichard Henderson tcg_gen_vec_sar8i_i64(a, a, 1); 21888989b95eSRichard Henderson tcg_gen_vec_sar8i_i64(b, b, 1); 21898989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 21908989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 21918989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 21928989b95eSRichard Henderson } 21938989b95eSRichard Henderson 21948989b95eSRichard Henderson static void gen_srhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 21958989b95eSRichard Henderson { 21968989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 21978989b95eSRichard Henderson 21988989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 21998989b95eSRichard Henderson tcg_gen_vec_sar16i_i64(a, a, 1); 22008989b95eSRichard Henderson tcg_gen_vec_sar16i_i64(b, b, 1); 22018989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 22028989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 22038989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 22048989b95eSRichard Henderson } 22058989b95eSRichard Henderson 22068989b95eSRichard Henderson static void gen_srhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 22078989b95eSRichard Henderson { 22088989b95eSRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 22098989b95eSRichard Henderson 22108989b95eSRichard Henderson tcg_gen_or_i32(t, a, b); 22118989b95eSRichard Henderson tcg_gen_sari_i32(a, a, 1); 22128989b95eSRichard Henderson tcg_gen_sari_i32(b, b, 1); 22138989b95eSRichard Henderson tcg_gen_andi_i32(t, t, 1); 22148989b95eSRichard Henderson tcg_gen_add_i32(d, a, b); 22158989b95eSRichard Henderson tcg_gen_add_i32(d, d, t); 22168989b95eSRichard Henderson } 22178989b95eSRichard Henderson 22188989b95eSRichard Henderson static void gen_srhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 22198989b95eSRichard Henderson { 22208989b95eSRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 22218989b95eSRichard Henderson 22228989b95eSRichard Henderson tcg_gen_or_vec(vece, t, a, b); 22238989b95eSRichard Henderson tcg_gen_sari_vec(vece, a, a, 1); 22248989b95eSRichard Henderson tcg_gen_sari_vec(vece, b, b, 1); 22258989b95eSRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 22268989b95eSRichard Henderson tcg_gen_add_vec(vece, d, a, b); 22278989b95eSRichard Henderson tcg_gen_add_vec(vece, d, d, t); 22288989b95eSRichard Henderson } 22298989b95eSRichard Henderson 22308989b95eSRichard Henderson void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 22318989b95eSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 22328989b95eSRichard Henderson { 22338989b95eSRichard Henderson static const TCGOpcode vecop_list[] = { 22348989b95eSRichard Henderson INDEX_op_sari_vec, INDEX_op_add_vec, 0 22358989b95eSRichard Henderson }; 22368989b95eSRichard Henderson static const GVecGen3 g[] = { 22378989b95eSRichard Henderson { .fni8 = gen_srhadd8_i64, 22388989b95eSRichard Henderson .fniv = gen_srhadd_vec, 22398989b95eSRichard Henderson .opt_opc = vecop_list, 22408989b95eSRichard Henderson .vece = MO_8 }, 22418989b95eSRichard Henderson { .fni8 = gen_srhadd16_i64, 22428989b95eSRichard Henderson .fniv = gen_srhadd_vec, 22438989b95eSRichard Henderson .opt_opc = vecop_list, 22448989b95eSRichard Henderson .vece = MO_16 }, 22458989b95eSRichard Henderson { .fni4 = gen_srhadd_i32, 22468989b95eSRichard Henderson .fniv = gen_srhadd_vec, 22478989b95eSRichard Henderson .opt_opc = vecop_list, 22488989b95eSRichard Henderson .vece = MO_32 }, 22498989b95eSRichard Henderson }; 22508989b95eSRichard Henderson assert(vece <= MO_32); 22518989b95eSRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 22528989b95eSRichard Henderson } 22538989b95eSRichard Henderson 22548989b95eSRichard Henderson static void gen_urhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 22558989b95eSRichard Henderson { 22568989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 22578989b95eSRichard Henderson 22588989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 22598989b95eSRichard Henderson tcg_gen_vec_shr8i_i64(a, a, 1); 22608989b95eSRichard Henderson tcg_gen_vec_shr8i_i64(b, b, 1); 22618989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); 22628989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, a, b); 22638989b95eSRichard Henderson tcg_gen_vec_add8_i64(d, d, t); 22648989b95eSRichard Henderson } 22658989b95eSRichard Henderson 22668989b95eSRichard Henderson static void gen_urhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 22678989b95eSRichard Henderson { 22688989b95eSRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 22698989b95eSRichard Henderson 22708989b95eSRichard Henderson tcg_gen_or_i64(t, a, b); 22718989b95eSRichard Henderson tcg_gen_vec_shr16i_i64(a, a, 1); 22728989b95eSRichard Henderson tcg_gen_vec_shr16i_i64(b, b, 1); 22738989b95eSRichard Henderson tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); 22748989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, a, b); 22758989b95eSRichard Henderson tcg_gen_vec_add16_i64(d, d, t); 22768989b95eSRichard Henderson } 22778989b95eSRichard Henderson 22788989b95eSRichard Henderson static void gen_urhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) 22798989b95eSRichard Henderson { 22808989b95eSRichard Henderson TCGv_i32 t = tcg_temp_new_i32(); 22818989b95eSRichard Henderson 22828989b95eSRichard Henderson tcg_gen_or_i32(t, a, b); 22838989b95eSRichard Henderson tcg_gen_shri_i32(a, a, 1); 22848989b95eSRichard Henderson tcg_gen_shri_i32(b, b, 1); 22858989b95eSRichard Henderson tcg_gen_andi_i32(t, t, 1); 22868989b95eSRichard Henderson tcg_gen_add_i32(d, a, b); 22878989b95eSRichard Henderson tcg_gen_add_i32(d, d, t); 22888989b95eSRichard Henderson } 22898989b95eSRichard Henderson 22908989b95eSRichard Henderson static void gen_urhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) 22918989b95eSRichard Henderson { 22928989b95eSRichard Henderson TCGv_vec t = tcg_temp_new_vec_matching(d); 22938989b95eSRichard Henderson 22948989b95eSRichard Henderson tcg_gen_or_vec(vece, t, a, b); 22958989b95eSRichard Henderson tcg_gen_shri_vec(vece, a, a, 1); 22968989b95eSRichard Henderson tcg_gen_shri_vec(vece, b, b, 1); 22978989b95eSRichard Henderson tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); 22988989b95eSRichard Henderson tcg_gen_add_vec(vece, d, a, b); 22998989b95eSRichard Henderson tcg_gen_add_vec(vece, d, d, t); 23008989b95eSRichard Henderson } 23018989b95eSRichard Henderson 23028989b95eSRichard Henderson void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 23038989b95eSRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 23048989b95eSRichard Henderson { 23058989b95eSRichard Henderson static const TCGOpcode vecop_list[] = { 23068989b95eSRichard Henderson INDEX_op_shri_vec, INDEX_op_add_vec, 0 23078989b95eSRichard Henderson }; 23088989b95eSRichard Henderson static const GVecGen3 g[] = { 23098989b95eSRichard Henderson { .fni8 = gen_urhadd8_i64, 23108989b95eSRichard Henderson .fniv = gen_urhadd_vec, 23118989b95eSRichard Henderson .opt_opc = vecop_list, 23128989b95eSRichard Henderson .vece = MO_8 }, 23138989b95eSRichard Henderson { .fni8 = gen_urhadd16_i64, 23148989b95eSRichard Henderson .fniv = gen_urhadd_vec, 23158989b95eSRichard Henderson .opt_opc = vecop_list, 23168989b95eSRichard Henderson .vece = MO_16 }, 23178989b95eSRichard Henderson { .fni4 = gen_urhadd_i32, 23188989b95eSRichard Henderson .fniv = gen_urhadd_vec, 23198989b95eSRichard Henderson .opt_opc = vecop_list, 23208989b95eSRichard Henderson .vece = MO_32 }, 23218989b95eSRichard Henderson }; 23228989b95eSRichard Henderson assert(vece <= MO_32); 23238989b95eSRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); 23248989b95eSRichard Henderson } 2325