xref: /qemu/target/arm/tcg/gengvec64.c (revision 8f6343ae18d745653a4668cc0924016444d76460)
1 /*
2  *  AArch64 generic vector expansion
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "translate.h"
22 #include "translate-a64.h"
23 
24 
25 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
26 {
27     tcg_gen_rotli_i64(d, m, 1);
28     tcg_gen_xor_i64(d, d, n);
29 }
30 
31 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
32 {
33     tcg_gen_rotli_vec(vece, d, m, 1);
34     tcg_gen_xor_vec(vece, d, d, n);
35 }
36 
37 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
38                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
39 {
40     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
41     static const GVecGen3 op = {
42         .fni8 = gen_rax1_i64,
43         .fniv = gen_rax1_vec,
44         .opt_opc = vecop_list,
45         .fno = gen_helper_crypto_rax1,
46         .vece = MO_64,
47     };
48     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
49 }
50 
51 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
52 {
53     TCGv_i64 t = tcg_temp_new_i64();
54     uint64_t mask = dup_const(MO_8, 0xff >> sh);
55 
56     tcg_gen_xor_i64(t, n, m);
57     tcg_gen_shri_i64(d, t, sh);
58     tcg_gen_shli_i64(t, t, 8 - sh);
59     tcg_gen_andi_i64(d, d, mask);
60     tcg_gen_andi_i64(t, t, ~mask);
61     tcg_gen_or_i64(d, d, t);
62 }
63 
64 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
65 {
66     TCGv_i64 t = tcg_temp_new_i64();
67     uint64_t mask = dup_const(MO_16, 0xffff >> sh);
68 
69     tcg_gen_xor_i64(t, n, m);
70     tcg_gen_shri_i64(d, t, sh);
71     tcg_gen_shli_i64(t, t, 16 - sh);
72     tcg_gen_andi_i64(d, d, mask);
73     tcg_gen_andi_i64(t, t, ~mask);
74     tcg_gen_or_i64(d, d, t);
75 }
76 
77 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
78 {
79     tcg_gen_xor_i32(d, n, m);
80     tcg_gen_rotri_i32(d, d, sh);
81 }
82 
83 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
84 {
85     tcg_gen_xor_i64(d, n, m);
86     tcg_gen_rotri_i64(d, d, sh);
87 }
88 
89 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
90                         TCGv_vec m, int64_t sh)
91 {
92     tcg_gen_xor_vec(vece, d, n, m);
93     tcg_gen_rotri_vec(vece, d, d, sh);
94 }
95 
96 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
97                   uint32_t rm_ofs, int64_t shift,
98                   uint32_t opr_sz, uint32_t max_sz)
99 {
100     static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
101     static const GVecGen3i ops[4] = {
102         { .fni8 = gen_xar8_i64,
103           .fniv = gen_xar_vec,
104           .fno = gen_helper_sve2_xar_b,
105           .opt_opc = vecop,
106           .vece = MO_8 },
107         { .fni8 = gen_xar16_i64,
108           .fniv = gen_xar_vec,
109           .fno = gen_helper_sve2_xar_h,
110           .opt_opc = vecop,
111           .vece = MO_16 },
112         { .fni4 = gen_xar_i32,
113           .fniv = gen_xar_vec,
114           .fno = gen_helper_sve2_xar_s,
115           .opt_opc = vecop,
116           .vece = MO_32 },
117         { .fni8 = gen_xar_i64,
118           .fniv = gen_xar_vec,
119           .fno = gen_helper_gvec_xar_d,
120           .opt_opc = vecop,
121           .vece = MO_64 }
122     };
123     int esize = 8 << vece;
124 
125     /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
126     tcg_debug_assert(shift >= 0);
127     tcg_debug_assert(shift <= esize);
128     shift &= esize - 1;
129 
130     if (shift == 0) {
131         /* xar with no rotate devolves to xor. */
132         tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
133     } else {
134         tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
135                         shift, &ops[vece]);
136     }
137 }
138 
139 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
140 {
141     tcg_gen_xor_i64(d, n, m);
142     tcg_gen_xor_i64(d, d, k);
143 }
144 
145 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
146                          TCGv_vec m, TCGv_vec k)
147 {
148     tcg_gen_xor_vec(vece, d, n, m);
149     tcg_gen_xor_vec(vece, d, d, k);
150 }
151 
152 void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
153                    uint32_t a, uint32_t oprsz, uint32_t maxsz)
154 {
155     static const GVecGen4 op = {
156         .fni8 = gen_eor3_i64,
157         .fniv = gen_eor3_vec,
158         .fno = gen_helper_sve2_eor3,
159         .vece = MO_64,
160         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
161     };
162     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
163 }
164 
165 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
166 {
167     tcg_gen_andc_i64(d, m, k);
168     tcg_gen_xor_i64(d, d, n);
169 }
170 
171 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
172                          TCGv_vec m, TCGv_vec k)
173 {
174     tcg_gen_andc_vec(vece, d, m, k);
175     tcg_gen_xor_vec(vece, d, d, n);
176 }
177 
178 void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
179                    uint32_t a, uint32_t oprsz, uint32_t maxsz)
180 {
181     static const GVecGen4 op = {
182         .fni8 = gen_bcax_i64,
183         .fniv = gen_bcax_vec,
184         .fno = gen_helper_sve2_bcax,
185         .vece = MO_64,
186         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
187     };
188     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
189 }
190 
191 static void gen_suqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
192                            TCGv_vec a, TCGv_vec b)
193 {
194     TCGv_vec max =
195         tcg_constant_vec_matching(t, vece, (1ull << ((8 << vece) - 1)) - 1);
196     TCGv_vec u = tcg_temp_new_vec_matching(t);
197 
198     /* Maximum value that can be added to @a without overflow. */
199     tcg_gen_sub_vec(vece, u, max, a);
200 
201     /* Constrain addend so that the next addition never overflows. */
202     tcg_gen_umin_vec(vece, u, u, b);
203     tcg_gen_add_vec(vece, t, u, a);
204 
205     /* Compute QC by comparing the adjusted @b. */
206     tcg_gen_xor_vec(vece, u, u, b);
207     tcg_gen_or_vec(vece, qc, qc, u);
208 }
209 
210 void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs,
211                         uint32_t rn_ofs, uint32_t rm_ofs,
212                         uint32_t opr_sz, uint32_t max_sz)
213 {
214     static const TCGOpcode vecop_list[] = {
215         INDEX_op_add_vec, INDEX_op_sub_vec, INDEX_op_umin_vec, 0
216     };
217     static const GVecGen4 ops[4] = {
218         { .fniv = gen_suqadd_vec,
219           .fno = gen_helper_gvec_suqadd_b,
220           .opt_opc = vecop_list,
221           .write_aofs = true,
222           .vece = MO_8 },
223         { .fniv = gen_suqadd_vec,
224           .fno = gen_helper_gvec_suqadd_h,
225           .opt_opc = vecop_list,
226           .write_aofs = true,
227           .vece = MO_16 },
228         { .fniv = gen_suqadd_vec,
229           .fno = gen_helper_gvec_suqadd_s,
230           .opt_opc = vecop_list,
231           .write_aofs = true,
232           .vece = MO_32 },
233         { .fniv = gen_suqadd_vec,
234           .fno = gen_helper_gvec_suqadd_d,
235           .opt_opc = vecop_list,
236           .write_aofs = true,
237           .vece = MO_64 },
238     };
239 
240     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
241     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
242                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
243 }
244 
245 static void gen_usqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
246                            TCGv_vec a, TCGv_vec b)
247 {
248     TCGv_vec u = tcg_temp_new_vec_matching(t);
249     TCGv_vec z = tcg_constant_vec_matching(t, vece, 0);
250 
251     /* Compute unsigned saturation of add for +b and sub for -b. */
252     tcg_gen_neg_vec(vece, t, b);
253     tcg_gen_usadd_vec(vece, u, a, b);
254     tcg_gen_ussub_vec(vece, t, a, t);
255 
256     /* Select the correct result depending on the sign of b. */
257     tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, b, z, t, u);
258 
259     /* Compute QC by comparing against the non-saturated result. */
260     tcg_gen_add_vec(vece, u, a, b);
261     tcg_gen_xor_vec(vece, u, u, t);
262     tcg_gen_or_vec(vece, qc, qc, u);
263 }
264 
265 void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs,
266                         uint32_t rn_ofs, uint32_t rm_ofs,
267                         uint32_t opr_sz, uint32_t max_sz)
268 {
269     static const TCGOpcode vecop_list[] = {
270         INDEX_op_neg_vec, INDEX_op_add_vec,
271         INDEX_op_usadd_vec, INDEX_op_ussub_vec,
272         INDEX_op_cmpsel_vec, 0
273     };
274     static const GVecGen4 ops[4] = {
275         { .fniv = gen_usqadd_vec,
276           .fno = gen_helper_gvec_usqadd_b,
277           .opt_opc = vecop_list,
278           .write_aofs = true,
279           .vece = MO_8 },
280         { .fniv = gen_usqadd_vec,
281           .fno = gen_helper_gvec_usqadd_h,
282           .opt_opc = vecop_list,
283           .write_aofs = true,
284           .vece = MO_16 },
285         { .fniv = gen_usqadd_vec,
286           .fno = gen_helper_gvec_usqadd_s,
287           .opt_opc = vecop_list,
288           .write_aofs = true,
289           .vece = MO_32 },
290         { .fniv = gen_usqadd_vec,
291           .fno = gen_helper_gvec_usqadd_d,
292           .opt_opc = vecop_list,
293           .write_aofs = true,
294           .vece = MO_64 },
295     };
296 
297     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
298     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
299                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
300 }
301