xref: /qemu/target/arm/tcg/gengvec64.c (revision a11efe30b9fc33ecc38255019d7ed7c750ec27ba)
1*a11efe30SRichard Henderson /*
2*a11efe30SRichard Henderson  *  AArch64 generic vector expansion
3*a11efe30SRichard Henderson  *
4*a11efe30SRichard Henderson  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5*a11efe30SRichard Henderson  *
6*a11efe30SRichard Henderson  * This library is free software; you can redistribute it and/or
7*a11efe30SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
8*a11efe30SRichard Henderson  * License as published by the Free Software Foundation; either
9*a11efe30SRichard Henderson  * version 2.1 of the License, or (at your option) any later version.
10*a11efe30SRichard Henderson  *
11*a11efe30SRichard Henderson  * This library is distributed in the hope that it will be useful,
12*a11efe30SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13*a11efe30SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14*a11efe30SRichard Henderson  * Lesser General Public License for more details.
15*a11efe30SRichard Henderson  *
16*a11efe30SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
17*a11efe30SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18*a11efe30SRichard Henderson  */
19*a11efe30SRichard Henderson 
20*a11efe30SRichard Henderson #include "qemu/osdep.h"
21*a11efe30SRichard Henderson #include "translate.h"
22*a11efe30SRichard Henderson #include "translate-a64.h"
23*a11efe30SRichard Henderson 
24*a11efe30SRichard Henderson 
25*a11efe30SRichard Henderson static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
26*a11efe30SRichard Henderson {
27*a11efe30SRichard Henderson     tcg_gen_rotli_i64(d, m, 1);
28*a11efe30SRichard Henderson     tcg_gen_xor_i64(d, d, n);
29*a11efe30SRichard Henderson }
30*a11efe30SRichard Henderson 
31*a11efe30SRichard Henderson static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
32*a11efe30SRichard Henderson {
33*a11efe30SRichard Henderson     tcg_gen_rotli_vec(vece, d, m, 1);
34*a11efe30SRichard Henderson     tcg_gen_xor_vec(vece, d, d, n);
35*a11efe30SRichard Henderson }
36*a11efe30SRichard Henderson 
37*a11efe30SRichard Henderson void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
38*a11efe30SRichard Henderson                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
39*a11efe30SRichard Henderson {
40*a11efe30SRichard Henderson     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
41*a11efe30SRichard Henderson     static const GVecGen3 op = {
42*a11efe30SRichard Henderson         .fni8 = gen_rax1_i64,
43*a11efe30SRichard Henderson         .fniv = gen_rax1_vec,
44*a11efe30SRichard Henderson         .opt_opc = vecop_list,
45*a11efe30SRichard Henderson         .fno = gen_helper_crypto_rax1,
46*a11efe30SRichard Henderson         .vece = MO_64,
47*a11efe30SRichard Henderson     };
48*a11efe30SRichard Henderson     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
49*a11efe30SRichard Henderson }
50*a11efe30SRichard Henderson 
51*a11efe30SRichard Henderson static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
52*a11efe30SRichard Henderson {
53*a11efe30SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
54*a11efe30SRichard Henderson     uint64_t mask = dup_const(MO_8, 0xff >> sh);
55*a11efe30SRichard Henderson 
56*a11efe30SRichard Henderson     tcg_gen_xor_i64(t, n, m);
57*a11efe30SRichard Henderson     tcg_gen_shri_i64(d, t, sh);
58*a11efe30SRichard Henderson     tcg_gen_shli_i64(t, t, 8 - sh);
59*a11efe30SRichard Henderson     tcg_gen_andi_i64(d, d, mask);
60*a11efe30SRichard Henderson     tcg_gen_andi_i64(t, t, ~mask);
61*a11efe30SRichard Henderson     tcg_gen_or_i64(d, d, t);
62*a11efe30SRichard Henderson }
63*a11efe30SRichard Henderson 
64*a11efe30SRichard Henderson static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
65*a11efe30SRichard Henderson {
66*a11efe30SRichard Henderson     TCGv_i64 t = tcg_temp_new_i64();
67*a11efe30SRichard Henderson     uint64_t mask = dup_const(MO_16, 0xffff >> sh);
68*a11efe30SRichard Henderson 
69*a11efe30SRichard Henderson     tcg_gen_xor_i64(t, n, m);
70*a11efe30SRichard Henderson     tcg_gen_shri_i64(d, t, sh);
71*a11efe30SRichard Henderson     tcg_gen_shli_i64(t, t, 16 - sh);
72*a11efe30SRichard Henderson     tcg_gen_andi_i64(d, d, mask);
73*a11efe30SRichard Henderson     tcg_gen_andi_i64(t, t, ~mask);
74*a11efe30SRichard Henderson     tcg_gen_or_i64(d, d, t);
75*a11efe30SRichard Henderson }
76*a11efe30SRichard Henderson 
77*a11efe30SRichard Henderson static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
78*a11efe30SRichard Henderson {
79*a11efe30SRichard Henderson     tcg_gen_xor_i32(d, n, m);
80*a11efe30SRichard Henderson     tcg_gen_rotri_i32(d, d, sh);
81*a11efe30SRichard Henderson }
82*a11efe30SRichard Henderson 
83*a11efe30SRichard Henderson static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
84*a11efe30SRichard Henderson {
85*a11efe30SRichard Henderson     tcg_gen_xor_i64(d, n, m);
86*a11efe30SRichard Henderson     tcg_gen_rotri_i64(d, d, sh);
87*a11efe30SRichard Henderson }
88*a11efe30SRichard Henderson 
89*a11efe30SRichard Henderson static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
90*a11efe30SRichard Henderson                         TCGv_vec m, int64_t sh)
91*a11efe30SRichard Henderson {
92*a11efe30SRichard Henderson     tcg_gen_xor_vec(vece, d, n, m);
93*a11efe30SRichard Henderson     tcg_gen_rotri_vec(vece, d, d, sh);
94*a11efe30SRichard Henderson }
95*a11efe30SRichard Henderson 
96*a11efe30SRichard Henderson void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
97*a11efe30SRichard Henderson                   uint32_t rm_ofs, int64_t shift,
98*a11efe30SRichard Henderson                   uint32_t opr_sz, uint32_t max_sz)
99*a11efe30SRichard Henderson {
100*a11efe30SRichard Henderson     static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
101*a11efe30SRichard Henderson     static const GVecGen3i ops[4] = {
102*a11efe30SRichard Henderson         { .fni8 = gen_xar8_i64,
103*a11efe30SRichard Henderson           .fniv = gen_xar_vec,
104*a11efe30SRichard Henderson           .fno = gen_helper_sve2_xar_b,
105*a11efe30SRichard Henderson           .opt_opc = vecop,
106*a11efe30SRichard Henderson           .vece = MO_8 },
107*a11efe30SRichard Henderson         { .fni8 = gen_xar16_i64,
108*a11efe30SRichard Henderson           .fniv = gen_xar_vec,
109*a11efe30SRichard Henderson           .fno = gen_helper_sve2_xar_h,
110*a11efe30SRichard Henderson           .opt_opc = vecop,
111*a11efe30SRichard Henderson           .vece = MO_16 },
112*a11efe30SRichard Henderson         { .fni4 = gen_xar_i32,
113*a11efe30SRichard Henderson           .fniv = gen_xar_vec,
114*a11efe30SRichard Henderson           .fno = gen_helper_sve2_xar_s,
115*a11efe30SRichard Henderson           .opt_opc = vecop,
116*a11efe30SRichard Henderson           .vece = MO_32 },
117*a11efe30SRichard Henderson         { .fni8 = gen_xar_i64,
118*a11efe30SRichard Henderson           .fniv = gen_xar_vec,
119*a11efe30SRichard Henderson           .fno = gen_helper_gvec_xar_d,
120*a11efe30SRichard Henderson           .opt_opc = vecop,
121*a11efe30SRichard Henderson           .vece = MO_64 }
122*a11efe30SRichard Henderson     };
123*a11efe30SRichard Henderson     int esize = 8 << vece;
124*a11efe30SRichard Henderson 
125*a11efe30SRichard Henderson     /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
126*a11efe30SRichard Henderson     tcg_debug_assert(shift >= 0);
127*a11efe30SRichard Henderson     tcg_debug_assert(shift <= esize);
128*a11efe30SRichard Henderson     shift &= esize - 1;
129*a11efe30SRichard Henderson 
130*a11efe30SRichard Henderson     if (shift == 0) {
131*a11efe30SRichard Henderson         /* xar with no rotate devolves to xor. */
132*a11efe30SRichard Henderson         tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
133*a11efe30SRichard Henderson     } else {
134*a11efe30SRichard Henderson         tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
135*a11efe30SRichard Henderson                         shift, &ops[vece]);
136*a11efe30SRichard Henderson     }
137*a11efe30SRichard Henderson }
138*a11efe30SRichard Henderson 
139*a11efe30SRichard Henderson static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
140*a11efe30SRichard Henderson {
141*a11efe30SRichard Henderson     tcg_gen_xor_i64(d, n, m);
142*a11efe30SRichard Henderson     tcg_gen_xor_i64(d, d, k);
143*a11efe30SRichard Henderson }
144*a11efe30SRichard Henderson 
145*a11efe30SRichard Henderson static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
146*a11efe30SRichard Henderson                          TCGv_vec m, TCGv_vec k)
147*a11efe30SRichard Henderson {
148*a11efe30SRichard Henderson     tcg_gen_xor_vec(vece, d, n, m);
149*a11efe30SRichard Henderson     tcg_gen_xor_vec(vece, d, d, k);
150*a11efe30SRichard Henderson }
151*a11efe30SRichard Henderson 
152*a11efe30SRichard Henderson void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
153*a11efe30SRichard Henderson                    uint32_t a, uint32_t oprsz, uint32_t maxsz)
154*a11efe30SRichard Henderson {
155*a11efe30SRichard Henderson     static const GVecGen4 op = {
156*a11efe30SRichard Henderson         .fni8 = gen_eor3_i64,
157*a11efe30SRichard Henderson         .fniv = gen_eor3_vec,
158*a11efe30SRichard Henderson         .fno = gen_helper_sve2_eor3,
159*a11efe30SRichard Henderson         .vece = MO_64,
160*a11efe30SRichard Henderson         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
161*a11efe30SRichard Henderson     };
162*a11efe30SRichard Henderson     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
163*a11efe30SRichard Henderson }
164*a11efe30SRichard Henderson 
165*a11efe30SRichard Henderson static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
166*a11efe30SRichard Henderson {
167*a11efe30SRichard Henderson     tcg_gen_andc_i64(d, m, k);
168*a11efe30SRichard Henderson     tcg_gen_xor_i64(d, d, n);
169*a11efe30SRichard Henderson }
170*a11efe30SRichard Henderson 
171*a11efe30SRichard Henderson static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
172*a11efe30SRichard Henderson                          TCGv_vec m, TCGv_vec k)
173*a11efe30SRichard Henderson {
174*a11efe30SRichard Henderson     tcg_gen_andc_vec(vece, d, m, k);
175*a11efe30SRichard Henderson     tcg_gen_xor_vec(vece, d, d, n);
176*a11efe30SRichard Henderson }
177*a11efe30SRichard Henderson 
178*a11efe30SRichard Henderson void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
179*a11efe30SRichard Henderson                    uint32_t a, uint32_t oprsz, uint32_t maxsz)
180*a11efe30SRichard Henderson {
181*a11efe30SRichard Henderson     static const GVecGen4 op = {
182*a11efe30SRichard Henderson         .fni8 = gen_bcax_i64,
183*a11efe30SRichard Henderson         .fniv = gen_bcax_vec,
184*a11efe30SRichard Henderson         .fno = gen_helper_sve2_bcax,
185*a11efe30SRichard Henderson         .vece = MO_64,
186*a11efe30SRichard Henderson         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
187*a11efe30SRichard Henderson     };
188*a11efe30SRichard Henderson     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
189*a11efe30SRichard Henderson }
190*a11efe30SRichard Henderson 
191