1*a11efe30SRichard Henderson /* 2*a11efe30SRichard Henderson * AArch64 generic vector expansion 3*a11efe30SRichard Henderson * 4*a11efe30SRichard Henderson * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5*a11efe30SRichard Henderson * 6*a11efe30SRichard Henderson * This library is free software; you can redistribute it and/or 7*a11efe30SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8*a11efe30SRichard Henderson * License as published by the Free Software Foundation; either 9*a11efe30SRichard Henderson * version 2.1 of the License, or (at your option) any later version. 10*a11efe30SRichard Henderson * 11*a11efe30SRichard Henderson * This library is distributed in the hope that it will be useful, 12*a11efe30SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13*a11efe30SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14*a11efe30SRichard Henderson * Lesser General Public License for more details. 15*a11efe30SRichard Henderson * 16*a11efe30SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17*a11efe30SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18*a11efe30SRichard Henderson */ 19*a11efe30SRichard Henderson 20*a11efe30SRichard Henderson #include "qemu/osdep.h" 21*a11efe30SRichard Henderson #include "translate.h" 22*a11efe30SRichard Henderson #include "translate-a64.h" 23*a11efe30SRichard Henderson 24*a11efe30SRichard Henderson 25*a11efe30SRichard Henderson static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 26*a11efe30SRichard Henderson { 27*a11efe30SRichard Henderson tcg_gen_rotli_i64(d, m, 1); 28*a11efe30SRichard Henderson tcg_gen_xor_i64(d, d, n); 29*a11efe30SRichard Henderson } 30*a11efe30SRichard Henderson 31*a11efe30SRichard Henderson static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) 32*a11efe30SRichard Henderson { 33*a11efe30SRichard Henderson tcg_gen_rotli_vec(vece, d, m, 1); 34*a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, d, n); 35*a11efe30SRichard Henderson } 36*a11efe30SRichard Henderson 37*a11efe30SRichard Henderson void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 38*a11efe30SRichard Henderson uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 39*a11efe30SRichard Henderson { 40*a11efe30SRichard Henderson static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; 41*a11efe30SRichard Henderson static const GVecGen3 op = { 42*a11efe30SRichard Henderson .fni8 = gen_rax1_i64, 43*a11efe30SRichard Henderson .fniv = gen_rax1_vec, 44*a11efe30SRichard Henderson .opt_opc = vecop_list, 45*a11efe30SRichard Henderson .fno = gen_helper_crypto_rax1, 46*a11efe30SRichard Henderson .vece = MO_64, 47*a11efe30SRichard Henderson }; 48*a11efe30SRichard Henderson tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); 49*a11efe30SRichard Henderson } 50*a11efe30SRichard Henderson 51*a11efe30SRichard Henderson static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 52*a11efe30SRichard Henderson { 53*a11efe30SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 54*a11efe30SRichard Henderson uint64_t mask = dup_const(MO_8, 0xff >> sh); 55*a11efe30SRichard Henderson 56*a11efe30SRichard Henderson tcg_gen_xor_i64(t, n, m); 57*a11efe30SRichard Henderson tcg_gen_shri_i64(d, t, sh); 58*a11efe30SRichard Henderson tcg_gen_shli_i64(t, t, 8 - sh); 59*a11efe30SRichard Henderson tcg_gen_andi_i64(d, d, mask); 60*a11efe30SRichard Henderson tcg_gen_andi_i64(t, t, ~mask); 61*a11efe30SRichard Henderson tcg_gen_or_i64(d, d, t); 62*a11efe30SRichard Henderson } 63*a11efe30SRichard Henderson 64*a11efe30SRichard Henderson static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 65*a11efe30SRichard Henderson { 66*a11efe30SRichard Henderson TCGv_i64 t = tcg_temp_new_i64(); 67*a11efe30SRichard Henderson uint64_t mask = dup_const(MO_16, 0xffff >> sh); 68*a11efe30SRichard Henderson 69*a11efe30SRichard Henderson tcg_gen_xor_i64(t, n, m); 70*a11efe30SRichard Henderson tcg_gen_shri_i64(d, t, sh); 71*a11efe30SRichard Henderson tcg_gen_shli_i64(t, t, 16 - sh); 72*a11efe30SRichard Henderson tcg_gen_andi_i64(d, d, mask); 73*a11efe30SRichard Henderson tcg_gen_andi_i64(t, t, ~mask); 74*a11efe30SRichard Henderson tcg_gen_or_i64(d, d, t); 75*a11efe30SRichard Henderson } 76*a11efe30SRichard Henderson 77*a11efe30SRichard Henderson static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 78*a11efe30SRichard Henderson { 79*a11efe30SRichard Henderson tcg_gen_xor_i32(d, n, m); 80*a11efe30SRichard Henderson tcg_gen_rotri_i32(d, d, sh); 81*a11efe30SRichard Henderson } 82*a11efe30SRichard Henderson 83*a11efe30SRichard Henderson static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 84*a11efe30SRichard Henderson { 85*a11efe30SRichard Henderson tcg_gen_xor_i64(d, n, m); 86*a11efe30SRichard Henderson tcg_gen_rotri_i64(d, d, sh); 87*a11efe30SRichard Henderson } 88*a11efe30SRichard Henderson 89*a11efe30SRichard Henderson static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 90*a11efe30SRichard Henderson TCGv_vec m, int64_t sh) 91*a11efe30SRichard Henderson { 92*a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, n, m); 93*a11efe30SRichard Henderson tcg_gen_rotri_vec(vece, d, d, sh); 94*a11efe30SRichard Henderson } 95*a11efe30SRichard Henderson 96*a11efe30SRichard Henderson void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 97*a11efe30SRichard Henderson uint32_t rm_ofs, int64_t shift, 98*a11efe30SRichard Henderson uint32_t opr_sz, uint32_t max_sz) 99*a11efe30SRichard Henderson { 100*a11efe30SRichard Henderson static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 101*a11efe30SRichard Henderson static const GVecGen3i ops[4] = { 102*a11efe30SRichard Henderson { .fni8 = gen_xar8_i64, 103*a11efe30SRichard Henderson .fniv = gen_xar_vec, 104*a11efe30SRichard Henderson .fno = gen_helper_sve2_xar_b, 105*a11efe30SRichard Henderson .opt_opc = vecop, 106*a11efe30SRichard Henderson .vece = MO_8 }, 107*a11efe30SRichard Henderson { .fni8 = gen_xar16_i64, 108*a11efe30SRichard Henderson .fniv = gen_xar_vec, 109*a11efe30SRichard Henderson .fno = gen_helper_sve2_xar_h, 110*a11efe30SRichard Henderson .opt_opc = vecop, 111*a11efe30SRichard Henderson .vece = MO_16 }, 112*a11efe30SRichard Henderson { .fni4 = gen_xar_i32, 113*a11efe30SRichard Henderson .fniv = gen_xar_vec, 114*a11efe30SRichard Henderson .fno = gen_helper_sve2_xar_s, 115*a11efe30SRichard Henderson .opt_opc = vecop, 116*a11efe30SRichard Henderson .vece = MO_32 }, 117*a11efe30SRichard Henderson { .fni8 = gen_xar_i64, 118*a11efe30SRichard Henderson .fniv = gen_xar_vec, 119*a11efe30SRichard Henderson .fno = gen_helper_gvec_xar_d, 120*a11efe30SRichard Henderson .opt_opc = vecop, 121*a11efe30SRichard Henderson .vece = MO_64 } 122*a11efe30SRichard Henderson }; 123*a11efe30SRichard Henderson int esize = 8 << vece; 124*a11efe30SRichard Henderson 125*a11efe30SRichard Henderson /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 126*a11efe30SRichard Henderson tcg_debug_assert(shift >= 0); 127*a11efe30SRichard Henderson tcg_debug_assert(shift <= esize); 128*a11efe30SRichard Henderson shift &= esize - 1; 129*a11efe30SRichard Henderson 130*a11efe30SRichard Henderson if (shift == 0) { 131*a11efe30SRichard Henderson /* xar with no rotate devolves to xor. */ 132*a11efe30SRichard Henderson tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 133*a11efe30SRichard Henderson } else { 134*a11efe30SRichard Henderson tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 135*a11efe30SRichard Henderson shift, &ops[vece]); 136*a11efe30SRichard Henderson } 137*a11efe30SRichard Henderson } 138*a11efe30SRichard Henderson 139*a11efe30SRichard Henderson static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 140*a11efe30SRichard Henderson { 141*a11efe30SRichard Henderson tcg_gen_xor_i64(d, n, m); 142*a11efe30SRichard Henderson tcg_gen_xor_i64(d, d, k); 143*a11efe30SRichard Henderson } 144*a11efe30SRichard Henderson 145*a11efe30SRichard Henderson static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 146*a11efe30SRichard Henderson TCGv_vec m, TCGv_vec k) 147*a11efe30SRichard Henderson { 148*a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, n, m); 149*a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, d, k); 150*a11efe30SRichard Henderson } 151*a11efe30SRichard Henderson 152*a11efe30SRichard Henderson void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 153*a11efe30SRichard Henderson uint32_t a, uint32_t oprsz, uint32_t maxsz) 154*a11efe30SRichard Henderson { 155*a11efe30SRichard Henderson static const GVecGen4 op = { 156*a11efe30SRichard Henderson .fni8 = gen_eor3_i64, 157*a11efe30SRichard Henderson .fniv = gen_eor3_vec, 158*a11efe30SRichard Henderson .fno = gen_helper_sve2_eor3, 159*a11efe30SRichard Henderson .vece = MO_64, 160*a11efe30SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 161*a11efe30SRichard Henderson }; 162*a11efe30SRichard Henderson tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 163*a11efe30SRichard Henderson } 164*a11efe30SRichard Henderson 165*a11efe30SRichard Henderson static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 166*a11efe30SRichard Henderson { 167*a11efe30SRichard Henderson tcg_gen_andc_i64(d, m, k); 168*a11efe30SRichard Henderson tcg_gen_xor_i64(d, d, n); 169*a11efe30SRichard Henderson } 170*a11efe30SRichard Henderson 171*a11efe30SRichard Henderson static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 172*a11efe30SRichard Henderson TCGv_vec m, TCGv_vec k) 173*a11efe30SRichard Henderson { 174*a11efe30SRichard Henderson tcg_gen_andc_vec(vece, d, m, k); 175*a11efe30SRichard Henderson tcg_gen_xor_vec(vece, d, d, n); 176*a11efe30SRichard Henderson } 177*a11efe30SRichard Henderson 178*a11efe30SRichard Henderson void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 179*a11efe30SRichard Henderson uint32_t a, uint32_t oprsz, uint32_t maxsz) 180*a11efe30SRichard Henderson { 181*a11efe30SRichard Henderson static const GVecGen4 op = { 182*a11efe30SRichard Henderson .fni8 = gen_bcax_i64, 183*a11efe30SRichard Henderson .fniv = gen_bcax_vec, 184*a11efe30SRichard Henderson .fno = gen_helper_sve2_bcax, 185*a11efe30SRichard Henderson .vece = MO_64, 186*a11efe30SRichard Henderson .prefer_i64 = TCG_TARGET_REG_BITS == 64, 187*a11efe30SRichard Henderson }; 188*a11efe30SRichard Henderson tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 189*a11efe30SRichard Henderson } 190*a11efe30SRichard Henderson 191