xref: /qemu/target/s390x/tcg/vec_int_helper.c (revision 2d8bc6815e9a1c81f0aa0a1365c4d9a1e0cb0a81)
1c1a81d4bSDavid Hildenbrand /*
2c1a81d4bSDavid Hildenbrand  * QEMU TCG support -- s390x vector integer instruction support
3c1a81d4bSDavid Hildenbrand  *
4c1a81d4bSDavid Hildenbrand  * Copyright (C) 2019 Red Hat Inc
5c1a81d4bSDavid Hildenbrand  *
6c1a81d4bSDavid Hildenbrand  * Authors:
7c1a81d4bSDavid Hildenbrand  *   David Hildenbrand <david@redhat.com>
8c1a81d4bSDavid Hildenbrand  *
9c1a81d4bSDavid Hildenbrand  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10c1a81d4bSDavid Hildenbrand  * See the COPYING file in the top-level directory.
11c1a81d4bSDavid Hildenbrand  */
12c1a81d4bSDavid Hildenbrand #include "qemu/osdep.h"
13c1a81d4bSDavid Hildenbrand #include "cpu.h"
14c1a81d4bSDavid Hildenbrand #include "vec.h"
15c1a81d4bSDavid Hildenbrand #include "exec/helper-proto.h"
165c4b0ab4SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h"
17*2d8bc681SRichard Henderson #include "crypto/clmul.h"
18c1a81d4bSDavid Hildenbrand 
19697a45d6SDavid Hildenbrand static bool s390_vec_is_zero(const S390Vector *v)
20697a45d6SDavid Hildenbrand {
21697a45d6SDavid Hildenbrand     return !v->doubleword[0] && !v->doubleword[1];
22697a45d6SDavid Hildenbrand }
23697a45d6SDavid Hildenbrand 
24697a45d6SDavid Hildenbrand static void s390_vec_xor(S390Vector *res, const S390Vector *a,
25697a45d6SDavid Hildenbrand                          const S390Vector *b)
26697a45d6SDavid Hildenbrand {
27697a45d6SDavid Hildenbrand     res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
28697a45d6SDavid Hildenbrand     res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
29697a45d6SDavid Hildenbrand }
30697a45d6SDavid Hildenbrand 
31db156ebfSDavid Hildenbrand static void s390_vec_and(S390Vector *res, const S390Vector *a,
32db156ebfSDavid Hildenbrand                          const S390Vector *b)
33db156ebfSDavid Hildenbrand {
34db156ebfSDavid Hildenbrand     res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
35db156ebfSDavid Hildenbrand     res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
36db156ebfSDavid Hildenbrand }
37db156ebfSDavid Hildenbrand 
38db156ebfSDavid Hildenbrand static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
39db156ebfSDavid Hildenbrand {
40db156ebfSDavid Hildenbrand     return a->doubleword[0] == b->doubleword[0] &&
41db156ebfSDavid Hildenbrand            a->doubleword[1] == b->doubleword[1];
42db156ebfSDavid Hildenbrand }
43db156ebfSDavid Hildenbrand 
44697a45d6SDavid Hildenbrand static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
45697a45d6SDavid Hildenbrand {
46697a45d6SDavid Hildenbrand     uint64_t tmp;
47697a45d6SDavid Hildenbrand 
48697a45d6SDavid Hildenbrand     g_assert(count < 128);
49697a45d6SDavid Hildenbrand     if (count == 0) {
50697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
51697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
52697a45d6SDavid Hildenbrand     } else if (count == 64) {
53697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[1];
54697a45d6SDavid Hildenbrand         d->doubleword[1] = 0;
55697a45d6SDavid Hildenbrand     } else if (count < 64) {
56697a45d6SDavid Hildenbrand         tmp = extract64(a->doubleword[1], 64 - count, count);
57697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1] << count;
58697a45d6SDavid Hildenbrand         d->doubleword[0] = (a->doubleword[0] << count) | tmp;
59697a45d6SDavid Hildenbrand     } else {
60697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[1] << (count - 64);
61697a45d6SDavid Hildenbrand         d->doubleword[1] = 0;
62697a45d6SDavid Hildenbrand     }
63697a45d6SDavid Hildenbrand }
64697a45d6SDavid Hildenbrand 
655f724887SDavid Hildenbrand static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
665f724887SDavid Hildenbrand {
675f724887SDavid Hildenbrand     uint64_t tmp;
685f724887SDavid Hildenbrand 
695f724887SDavid Hildenbrand     if (count == 0) {
705f724887SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
715f724887SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
725f724887SDavid Hildenbrand     } else if (count == 64) {
73b57b3368SDavid Hildenbrand         tmp = (int64_t)a->doubleword[0] >> 63;
745f724887SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0];
75b57b3368SDavid Hildenbrand         d->doubleword[0] = tmp;
765f724887SDavid Hildenbrand     } else if (count < 64) {
775f724887SDavid Hildenbrand         tmp = a->doubleword[1] >> count;
785f724887SDavid Hildenbrand         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
795f724887SDavid Hildenbrand         d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
805f724887SDavid Hildenbrand     } else {
81b57b3368SDavid Hildenbrand         tmp = (int64_t)a->doubleword[0] >> 63;
825f724887SDavid Hildenbrand         d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
83b57b3368SDavid Hildenbrand         d->doubleword[0] = tmp;
845f724887SDavid Hildenbrand     }
855f724887SDavid Hildenbrand }
865f724887SDavid Hildenbrand 
87697a45d6SDavid Hildenbrand static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
88697a45d6SDavid Hildenbrand {
89697a45d6SDavid Hildenbrand     uint64_t tmp;
90697a45d6SDavid Hildenbrand 
91697a45d6SDavid Hildenbrand     g_assert(count < 128);
92697a45d6SDavid Hildenbrand     if (count == 0) {
93697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
94697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
95697a45d6SDavid Hildenbrand     } else if (count == 64) {
96697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0];
97697a45d6SDavid Hildenbrand         d->doubleword[0] = 0;
98697a45d6SDavid Hildenbrand     } else if (count < 64) {
99697a45d6SDavid Hildenbrand         tmp = a->doubleword[1] >> count;
100697a45d6SDavid Hildenbrand         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
101697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0] >> count;
102697a45d6SDavid Hildenbrand     } else {
103697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0] >> (count - 64);
104697a45d6SDavid Hildenbrand         d->doubleword[0] = 0;
105697a45d6SDavid Hildenbrand     }
106697a45d6SDavid Hildenbrand }
107c1a81d4bSDavid Hildenbrand #define DEF_VAVG(BITS)                                                         \
108c1a81d4bSDavid Hildenbrand void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
109c1a81d4bSDavid Hildenbrand                              uint32_t desc)                                    \
110c1a81d4bSDavid Hildenbrand {                                                                              \
111c1a81d4bSDavid Hildenbrand     int i;                                                                     \
112c1a81d4bSDavid Hildenbrand                                                                                \
113c1a81d4bSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
114c1a81d4bSDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
115c1a81d4bSDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
116c1a81d4bSDavid Hildenbrand                                                                                \
117c1a81d4bSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
118c1a81d4bSDavid Hildenbrand     }                                                                          \
119c1a81d4bSDavid Hildenbrand }
120c1a81d4bSDavid Hildenbrand DEF_VAVG(8)
121c1a81d4bSDavid Hildenbrand DEF_VAVG(16)
122801aa78bSDavid Hildenbrand 
123801aa78bSDavid Hildenbrand #define DEF_VAVGL(BITS)                                                        \
124801aa78bSDavid Hildenbrand void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
125801aa78bSDavid Hildenbrand                               uint32_t desc)                                   \
126801aa78bSDavid Hildenbrand {                                                                              \
127801aa78bSDavid Hildenbrand     int i;                                                                     \
128801aa78bSDavid Hildenbrand                                                                                \
129801aa78bSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
130801aa78bSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
131801aa78bSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
132801aa78bSDavid Hildenbrand                                                                                \
133801aa78bSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
134801aa78bSDavid Hildenbrand     }                                                                          \
135801aa78bSDavid Hildenbrand }
136801aa78bSDavid Hildenbrand DEF_VAVGL(8)
137801aa78bSDavid Hildenbrand DEF_VAVGL(16)
13828863f1dSDavid Hildenbrand 
13928863f1dSDavid Hildenbrand #define DEF_VCLZ(BITS)                                                         \
14028863f1dSDavid Hildenbrand void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
14128863f1dSDavid Hildenbrand {                                                                              \
14228863f1dSDavid Hildenbrand     int i;                                                                     \
14328863f1dSDavid Hildenbrand                                                                                \
14428863f1dSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
14528863f1dSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
14628863f1dSDavid Hildenbrand                                                                                \
14728863f1dSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
14828863f1dSDavid Hildenbrand     }                                                                          \
14928863f1dSDavid Hildenbrand }
15028863f1dSDavid Hildenbrand DEF_VCLZ(8)
15128863f1dSDavid Hildenbrand DEF_VCLZ(16)
152449a8ac2SDavid Hildenbrand 
153449a8ac2SDavid Hildenbrand #define DEF_VCTZ(BITS)                                                         \
154449a8ac2SDavid Hildenbrand void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
155449a8ac2SDavid Hildenbrand {                                                                              \
156449a8ac2SDavid Hildenbrand     int i;                                                                     \
157449a8ac2SDavid Hildenbrand                                                                                \
158449a8ac2SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
159449a8ac2SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
160449a8ac2SDavid Hildenbrand                                                                                \
161449a8ac2SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
162449a8ac2SDavid Hildenbrand     }                                                                          \
163449a8ac2SDavid Hildenbrand }
164449a8ac2SDavid Hildenbrand DEF_VCTZ(8)
165449a8ac2SDavid Hildenbrand DEF_VCTZ(16)
166697a45d6SDavid Hildenbrand 
167697a45d6SDavid Hildenbrand /* like binary multiplication, but XOR instead of addition */
168697a45d6SDavid Hildenbrand #define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
169697a45d6SDavid Hildenbrand static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
170697a45d6SDavid Hildenbrand                                              uint##TBITS##_t b)                \
171697a45d6SDavid Hildenbrand {                                                                              \
172697a45d6SDavid Hildenbrand     uint##TBITS##_t res = 0;                                                   \
173697a45d6SDavid Hildenbrand                                                                                \
174697a45d6SDavid Hildenbrand     while (b) {                                                                \
175697a45d6SDavid Hildenbrand         if (b & 0x1) {                                                         \
176697a45d6SDavid Hildenbrand             res = res ^ a;                                                     \
177697a45d6SDavid Hildenbrand         }                                                                      \
178697a45d6SDavid Hildenbrand         a = a << 1;                                                            \
179697a45d6SDavid Hildenbrand         b = b >> 1;                                                            \
180697a45d6SDavid Hildenbrand     }                                                                          \
181697a45d6SDavid Hildenbrand     return res;                                                                \
182697a45d6SDavid Hildenbrand }
183697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(16, 32)
184697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(32, 64)
185697a45d6SDavid Hildenbrand 
186697a45d6SDavid Hildenbrand static S390Vector galois_multiply64(uint64_t a, uint64_t b)
187697a45d6SDavid Hildenbrand {
188697a45d6SDavid Hildenbrand     S390Vector res = {};
189697a45d6SDavid Hildenbrand     S390Vector va = {
190697a45d6SDavid Hildenbrand         .doubleword[1] = a,
191697a45d6SDavid Hildenbrand     };
192697a45d6SDavid Hildenbrand     S390Vector vb = {
193697a45d6SDavid Hildenbrand         .doubleword[1] = b,
194697a45d6SDavid Hildenbrand     };
195697a45d6SDavid Hildenbrand 
196697a45d6SDavid Hildenbrand     while (!s390_vec_is_zero(&vb)) {
197697a45d6SDavid Hildenbrand         if (vb.doubleword[1] & 0x1) {
198697a45d6SDavid Hildenbrand             s390_vec_xor(&res, &res, &va);
199697a45d6SDavid Hildenbrand         }
200697a45d6SDavid Hildenbrand         s390_vec_shl(&va, &va, 1);
201697a45d6SDavid Hildenbrand         s390_vec_shr(&vb, &vb, 1);
202697a45d6SDavid Hildenbrand     }
203697a45d6SDavid Hildenbrand     return res;
204697a45d6SDavid Hildenbrand }
205697a45d6SDavid Hildenbrand 
206*2d8bc681SRichard Henderson /*
207*2d8bc681SRichard Henderson  * There is no carry across the two doublewords, so their order does
208*2d8bc681SRichard Henderson  * not matter.  Nor is there partial overlap between registers.
209*2d8bc681SRichard Henderson  */
210*2d8bc681SRichard Henderson static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
211*2d8bc681SRichard Henderson {
212*2d8bc681SRichard Henderson     return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
213*2d8bc681SRichard Henderson }
214*2d8bc681SRichard Henderson 
215*2d8bc681SRichard Henderson void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
216*2d8bc681SRichard Henderson {
217*2d8bc681SRichard Henderson     uint64_t *q1 = v1;
218*2d8bc681SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3;
219*2d8bc681SRichard Henderson 
220*2d8bc681SRichard Henderson     q1[0] = do_gfma8(q2[0], q3[0], 0);
221*2d8bc681SRichard Henderson     q1[1] = do_gfma8(q2[1], q3[1], 0);
222*2d8bc681SRichard Henderson }
223*2d8bc681SRichard Henderson 
224*2d8bc681SRichard Henderson void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
225*2d8bc681SRichard Henderson                          const void *v4, uint32_t desc)
226*2d8bc681SRichard Henderson {
227*2d8bc681SRichard Henderson     uint64_t *q1 = v1;
228*2d8bc681SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
229*2d8bc681SRichard Henderson 
230*2d8bc681SRichard Henderson     q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
231*2d8bc681SRichard Henderson     q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
232*2d8bc681SRichard Henderson }
233*2d8bc681SRichard Henderson 
234697a45d6SDavid Hildenbrand #define DEF_VGFM(BITS, TBITS)                                                  \
235697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
236697a45d6SDavid Hildenbrand                              uint32_t desc)                                    \
237697a45d6SDavid Hildenbrand {                                                                              \
238697a45d6SDavid Hildenbrand     int i;                                                                     \
239697a45d6SDavid Hildenbrand                                                                                \
240697a45d6SDavid Hildenbrand     for (i = 0; i < (128 / TBITS); i++) {                                      \
241697a45d6SDavid Hildenbrand         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
242697a45d6SDavid Hildenbrand         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
243697a45d6SDavid Hildenbrand         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
244697a45d6SDavid Hildenbrand                                                                                \
245697a45d6SDavid Hildenbrand         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
246697a45d6SDavid Hildenbrand         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
247697a45d6SDavid Hildenbrand         d = d ^ galois_multiply32(a, b);                                       \
248697a45d6SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, d);                               \
249697a45d6SDavid Hildenbrand     }                                                                          \
250697a45d6SDavid Hildenbrand }
251697a45d6SDavid Hildenbrand DEF_VGFM(16, 32)
252697a45d6SDavid Hildenbrand DEF_VGFM(32, 64)
253697a45d6SDavid Hildenbrand 
254697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
255697a45d6SDavid Hildenbrand                          uint32_t desc)
256697a45d6SDavid Hildenbrand {
257697a45d6SDavid Hildenbrand     S390Vector tmp1, tmp2;
258697a45d6SDavid Hildenbrand     uint64_t a, b;
259697a45d6SDavid Hildenbrand 
260697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 0);
261697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 0);
262697a45d6SDavid Hildenbrand     tmp1 = galois_multiply64(a, b);
263697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 1);
264697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 1);
265697a45d6SDavid Hildenbrand     tmp2 = galois_multiply64(a, b);
266697a45d6SDavid Hildenbrand     s390_vec_xor(v1, &tmp1, &tmp2);
267697a45d6SDavid Hildenbrand }
268697a45d6SDavid Hildenbrand 
269697a45d6SDavid Hildenbrand #define DEF_VGFMA(BITS, TBITS)                                                 \
270697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
271697a45d6SDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
272697a45d6SDavid Hildenbrand {                                                                              \
273697a45d6SDavid Hildenbrand     int i;                                                                     \
274697a45d6SDavid Hildenbrand                                                                                \
275697a45d6SDavid Hildenbrand     for (i = 0; i < (128 / TBITS); i++) {                                      \
276697a45d6SDavid Hildenbrand         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
277697a45d6SDavid Hildenbrand         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
278697a45d6SDavid Hildenbrand         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
279697a45d6SDavid Hildenbrand                                                                                \
280697a45d6SDavid Hildenbrand         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
281697a45d6SDavid Hildenbrand         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
282697a45d6SDavid Hildenbrand         d = d ^ galois_multiply32(a, b);                                       \
283697a45d6SDavid Hildenbrand         d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
284697a45d6SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, d);                               \
285697a45d6SDavid Hildenbrand     }                                                                          \
286697a45d6SDavid Hildenbrand }
287697a45d6SDavid Hildenbrand DEF_VGFMA(16, 32)
288697a45d6SDavid Hildenbrand DEF_VGFMA(32, 64)
289697a45d6SDavid Hildenbrand 
290697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
291697a45d6SDavid Hildenbrand                           const void *v4, uint32_t desc)
292697a45d6SDavid Hildenbrand {
293697a45d6SDavid Hildenbrand     S390Vector tmp1, tmp2;
294697a45d6SDavid Hildenbrand     uint64_t a, b;
295697a45d6SDavid Hildenbrand 
296697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 0);
297697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 0);
298697a45d6SDavid Hildenbrand     tmp1 = galois_multiply64(a, b);
299697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 1);
300697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 1);
301697a45d6SDavid Hildenbrand     tmp2 = galois_multiply64(a, b);
302697a45d6SDavid Hildenbrand     s390_vec_xor(&tmp1, &tmp1, &tmp2);
303697a45d6SDavid Hildenbrand     s390_vec_xor(v1, &tmp1, v4);
304697a45d6SDavid Hildenbrand }
3051b430aecSDavid Hildenbrand 
3061b430aecSDavid Hildenbrand #define DEF_VMAL(BITS)                                                         \
3071b430aecSDavid Hildenbrand void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
3081b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
3091b430aecSDavid Hildenbrand {                                                                              \
3101b430aecSDavid Hildenbrand     int i;                                                                     \
3111b430aecSDavid Hildenbrand                                                                                \
3121b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
3131b430aecSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
3141b430aecSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
3151b430aecSDavid Hildenbrand         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
3161b430aecSDavid Hildenbrand                                                                                \
3171b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a * b + c);                        \
3181b430aecSDavid Hildenbrand     }                                                                          \
3191b430aecSDavid Hildenbrand }
3201b430aecSDavid Hildenbrand DEF_VMAL(8)
3211b430aecSDavid Hildenbrand DEF_VMAL(16)
3221b430aecSDavid Hildenbrand 
3231b430aecSDavid Hildenbrand #define DEF_VMAH(BITS)                                                         \
3241b430aecSDavid Hildenbrand void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
3251b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
3261b430aecSDavid Hildenbrand {                                                                              \
3271b430aecSDavid Hildenbrand     int i;                                                                     \
3281b430aecSDavid Hildenbrand                                                                                \
3291b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
3301b430aecSDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
3311b430aecSDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
3321b430aecSDavid Hildenbrand         const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
3331b430aecSDavid Hildenbrand                                                                                \
3341b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
3351b430aecSDavid Hildenbrand     }                                                                          \
3361b430aecSDavid Hildenbrand }
3371b430aecSDavid Hildenbrand DEF_VMAH(8)
3381b430aecSDavid Hildenbrand DEF_VMAH(16)
3391b430aecSDavid Hildenbrand 
3401b430aecSDavid Hildenbrand #define DEF_VMALH(BITS)                                                        \
3411b430aecSDavid Hildenbrand void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
3421b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
3431b430aecSDavid Hildenbrand {                                                                              \
3441b430aecSDavid Hildenbrand     int i;                                                                     \
3451b430aecSDavid Hildenbrand                                                                                \
3461b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
3471b430aecSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
3481b430aecSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
3491b430aecSDavid Hildenbrand         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
3501b430aecSDavid Hildenbrand                                                                                \
3511b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
3521b430aecSDavid Hildenbrand     }                                                                          \
3531b430aecSDavid Hildenbrand }
3541b430aecSDavid Hildenbrand DEF_VMALH(8)
3551b430aecSDavid Hildenbrand DEF_VMALH(16)
3561b430aecSDavid Hildenbrand 
3571b430aecSDavid Hildenbrand #define DEF_VMAE(BITS, TBITS)                                                  \
3581b430aecSDavid Hildenbrand void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
3591b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
3601b430aecSDavid Hildenbrand {                                                                              \
3611b430aecSDavid Hildenbrand     int i, j;                                                                  \
3621b430aecSDavid Hildenbrand                                                                                \
3631b430aecSDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
3641b430aecSDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
3651b430aecSDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
3668b952519SDavid Hildenbrand         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
3671b430aecSDavid Hildenbrand                                                                                \
3681b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3691b430aecSDavid Hildenbrand     }                                                                          \
3701b430aecSDavid Hildenbrand }
3711b430aecSDavid Hildenbrand DEF_VMAE(8, 16)
3721b430aecSDavid Hildenbrand DEF_VMAE(16, 32)
3731b430aecSDavid Hildenbrand DEF_VMAE(32, 64)
3741b430aecSDavid Hildenbrand 
3751b430aecSDavid Hildenbrand #define DEF_VMALE(BITS, TBITS)                                                 \
3761b430aecSDavid Hildenbrand void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
3771b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
3781b430aecSDavid Hildenbrand {                                                                              \
3791b430aecSDavid Hildenbrand     int i, j;                                                                  \
3801b430aecSDavid Hildenbrand                                                                                \
3811b430aecSDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
3821b430aecSDavid Hildenbrand         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
3831b430aecSDavid Hildenbrand         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
3848b952519SDavid Hildenbrand         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
3851b430aecSDavid Hildenbrand                                                                                \
3861b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3871b430aecSDavid Hildenbrand     }                                                                          \
3881b430aecSDavid Hildenbrand }
3891b430aecSDavid Hildenbrand DEF_VMALE(8, 16)
3901b430aecSDavid Hildenbrand DEF_VMALE(16, 32)
3911b430aecSDavid Hildenbrand DEF_VMALE(32, 64)
3921b430aecSDavid Hildenbrand 
3931b430aecSDavid Hildenbrand #define DEF_VMAO(BITS, TBITS)                                                  \
3941b430aecSDavid Hildenbrand void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
3951b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
3961b430aecSDavid Hildenbrand {                                                                              \
3971b430aecSDavid Hildenbrand     int i, j;                                                                  \
3981b430aecSDavid Hildenbrand                                                                                \
3991b430aecSDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
4001b430aecSDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
4011b430aecSDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
4028b952519SDavid Hildenbrand         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
4031b430aecSDavid Hildenbrand                                                                                \
4041b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
4051b430aecSDavid Hildenbrand     }                                                                          \
4061b430aecSDavid Hildenbrand }
4071b430aecSDavid Hildenbrand DEF_VMAO(8, 16)
4081b430aecSDavid Hildenbrand DEF_VMAO(16, 32)
4091b430aecSDavid Hildenbrand DEF_VMAO(32, 64)
4101b430aecSDavid Hildenbrand 
4111b430aecSDavid Hildenbrand #define DEF_VMALO(BITS, TBITS)                                                 \
4121b430aecSDavid Hildenbrand void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
4131b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
4141b430aecSDavid Hildenbrand {                                                                              \
4151b430aecSDavid Hildenbrand     int i, j;                                                                  \
4161b430aecSDavid Hildenbrand                                                                                \
4171b430aecSDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
4181b430aecSDavid Hildenbrand         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
4191b430aecSDavid Hildenbrand         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
4208b952519SDavid Hildenbrand         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
4211b430aecSDavid Hildenbrand                                                                                \
4221b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
4231b430aecSDavid Hildenbrand     }                                                                          \
4241b430aecSDavid Hildenbrand }
4251b430aecSDavid Hildenbrand DEF_VMALO(8, 16)
4261b430aecSDavid Hildenbrand DEF_VMALO(16, 32)
4271b430aecSDavid Hildenbrand DEF_VMALO(32, 64)
4282bf3ee38SDavid Hildenbrand 
4292bf3ee38SDavid Hildenbrand #define DEF_VMH(BITS)                                                          \
4302bf3ee38SDavid Hildenbrand void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
4312bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
4322bf3ee38SDavid Hildenbrand {                                                                              \
4332bf3ee38SDavid Hildenbrand     int i;                                                                     \
4342bf3ee38SDavid Hildenbrand                                                                                \
4352bf3ee38SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
4362bf3ee38SDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
4372bf3ee38SDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
4382bf3ee38SDavid Hildenbrand                                                                                \
4392bf3ee38SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
4402bf3ee38SDavid Hildenbrand     }                                                                          \
4412bf3ee38SDavid Hildenbrand }
4422bf3ee38SDavid Hildenbrand DEF_VMH(8)
4432bf3ee38SDavid Hildenbrand DEF_VMH(16)
4442bf3ee38SDavid Hildenbrand 
4452bf3ee38SDavid Hildenbrand #define DEF_VMLH(BITS)                                                         \
4462bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
4472bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
4482bf3ee38SDavid Hildenbrand {                                                                              \
4492bf3ee38SDavid Hildenbrand     int i;                                                                     \
4502bf3ee38SDavid Hildenbrand                                                                                \
4512bf3ee38SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
4522bf3ee38SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
4532bf3ee38SDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
4542bf3ee38SDavid Hildenbrand                                                                                \
4552bf3ee38SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
4562bf3ee38SDavid Hildenbrand     }                                                                          \
4572bf3ee38SDavid Hildenbrand }
4582bf3ee38SDavid Hildenbrand DEF_VMLH(8)
4592bf3ee38SDavid Hildenbrand DEF_VMLH(16)
4602bf3ee38SDavid Hildenbrand 
4612bf3ee38SDavid Hildenbrand #define DEF_VME(BITS, TBITS)                                                   \
4622bf3ee38SDavid Hildenbrand void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
4632bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
4642bf3ee38SDavid Hildenbrand {                                                                              \
4652bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
4662bf3ee38SDavid Hildenbrand                                                                                \
4672bf3ee38SDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
4682bf3ee38SDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
4692bf3ee38SDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
4702bf3ee38SDavid Hildenbrand                                                                                \
4712bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
4722bf3ee38SDavid Hildenbrand     }                                                                          \
4732bf3ee38SDavid Hildenbrand }
4742bf3ee38SDavid Hildenbrand DEF_VME(8, 16)
4752bf3ee38SDavid Hildenbrand DEF_VME(16, 32)
4762bf3ee38SDavid Hildenbrand DEF_VME(32, 64)
4772bf3ee38SDavid Hildenbrand 
4782bf3ee38SDavid Hildenbrand #define DEF_VMLE(BITS, TBITS)                                                  \
4792bf3ee38SDavid Hildenbrand void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
4802bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
4812bf3ee38SDavid Hildenbrand {                                                                              \
4822bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
4832bf3ee38SDavid Hildenbrand                                                                                \
4842bf3ee38SDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
4852bf3ee38SDavid Hildenbrand         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
4862bf3ee38SDavid Hildenbrand         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
4872bf3ee38SDavid Hildenbrand                                                                                \
4882bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
4892bf3ee38SDavid Hildenbrand     }                                                                          \
4902bf3ee38SDavid Hildenbrand }
4912bf3ee38SDavid Hildenbrand DEF_VMLE(8, 16)
4922bf3ee38SDavid Hildenbrand DEF_VMLE(16, 32)
4932bf3ee38SDavid Hildenbrand DEF_VMLE(32, 64)
4942bf3ee38SDavid Hildenbrand 
4952bf3ee38SDavid Hildenbrand #define DEF_VMO(BITS, TBITS)                                                   \
4962bf3ee38SDavid Hildenbrand void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
4972bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
4982bf3ee38SDavid Hildenbrand {                                                                              \
4992bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
5002bf3ee38SDavid Hildenbrand                                                                                \
5012bf3ee38SDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
5022bf3ee38SDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
5032bf3ee38SDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
5042bf3ee38SDavid Hildenbrand                                                                                \
5052bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
5062bf3ee38SDavid Hildenbrand     }                                                                          \
5072bf3ee38SDavid Hildenbrand }
5082bf3ee38SDavid Hildenbrand DEF_VMO(8, 16)
5092bf3ee38SDavid Hildenbrand DEF_VMO(16, 32)
5102bf3ee38SDavid Hildenbrand DEF_VMO(32, 64)
5112bf3ee38SDavid Hildenbrand 
5122bf3ee38SDavid Hildenbrand #define DEF_VMLO(BITS, TBITS)                                                  \
5132bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
5142bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
5152bf3ee38SDavid Hildenbrand {                                                                              \
5162bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
5172bf3ee38SDavid Hildenbrand                                                                                \
51849a7ce4eSDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
5192bf3ee38SDavid Hildenbrand         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
5202bf3ee38SDavid Hildenbrand         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
5212bf3ee38SDavid Hildenbrand                                                                                \
5222bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
5232bf3ee38SDavid Hildenbrand     }                                                                          \
5242bf3ee38SDavid Hildenbrand }
5252bf3ee38SDavid Hildenbrand DEF_VMLO(8, 16)
5262bf3ee38SDavid Hildenbrand DEF_VMLO(16, 32)
5272bf3ee38SDavid Hildenbrand DEF_VMLO(32, 64)
528c3838aaaSDavid Hildenbrand 
529c3838aaaSDavid Hildenbrand #define DEF_VPOPCT(BITS)                                                       \
530c3838aaaSDavid Hildenbrand void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
531c3838aaaSDavid Hildenbrand {                                                                              \
532c3838aaaSDavid Hildenbrand     int i;                                                                     \
533c3838aaaSDavid Hildenbrand                                                                                \
534c3838aaaSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
535c3838aaaSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
536c3838aaaSDavid Hildenbrand                                                                                \
537c3838aaaSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
538c3838aaaSDavid Hildenbrand     }                                                                          \
539c3838aaaSDavid Hildenbrand }
540c3838aaaSDavid Hildenbrand DEF_VPOPCT(8)
541c3838aaaSDavid Hildenbrand DEF_VPOPCT(16)
54255236da2SDavid Hildenbrand 
5435c4b0ab4SDavid Hildenbrand #define DEF_VERIM(BITS)                                                        \
5445c4b0ab4SDavid Hildenbrand void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
5455c4b0ab4SDavid Hildenbrand                               uint32_t desc)                                   \
5465c4b0ab4SDavid Hildenbrand {                                                                              \
5475c4b0ab4SDavid Hildenbrand     const uint8_t count = simd_data(desc);                                     \
5485c4b0ab4SDavid Hildenbrand     int i;                                                                     \
5495c4b0ab4SDavid Hildenbrand                                                                                \
5505c4b0ab4SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
5515c4b0ab4SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
5525c4b0ab4SDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
5535c4b0ab4SDavid Hildenbrand         const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
5545c4b0ab4SDavid Hildenbrand         const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
5555c4b0ab4SDavid Hildenbrand                                                                                \
5565c4b0ab4SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, d);                                \
5575c4b0ab4SDavid Hildenbrand     }                                                                          \
5585c4b0ab4SDavid Hildenbrand }
5595c4b0ab4SDavid Hildenbrand DEF_VERIM(8)
5605c4b0ab4SDavid Hildenbrand DEF_VERIM(16)
561dea33fc3SDavid Hildenbrand 
562dea33fc3SDavid Hildenbrand void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
563dea33fc3SDavid Hildenbrand                       uint32_t desc)
564dea33fc3SDavid Hildenbrand {
565dea33fc3SDavid Hildenbrand     s390_vec_shl(v1, v2, count);
566dea33fc3SDavid Hildenbrand }
5675f724887SDavid Hildenbrand 
568b7a50eb7SDavid Miller void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
569b7a50eb7SDavid Miller                           uint32_t desc)
570b7a50eb7SDavid Miller {
571b7a50eb7SDavid Miller     S390Vector tmp;
572b7a50eb7SDavid Miller     uint32_t sh, e0, e1 = 0;
573b7a50eb7SDavid Miller     int i;
574b7a50eb7SDavid Miller 
575b7a50eb7SDavid Miller     for (i = 15; i >= 0; --i, e1 = e0) {
576b7a50eb7SDavid Miller         e0 = s390_vec_read_element8(v2, i);
577b7a50eb7SDavid Miller         sh = s390_vec_read_element8(v3, i) & 7;
578b7a50eb7SDavid Miller 
579b7a50eb7SDavid Miller         s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
580b7a50eb7SDavid Miller     }
581b7a50eb7SDavid Miller 
582b7a50eb7SDavid Miller     *(S390Vector *)v1 = tmp;
583b7a50eb7SDavid Miller }
584b7a50eb7SDavid Miller 
5855f724887SDavid Hildenbrand void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
5865f724887SDavid Hildenbrand                        uint32_t desc)
5875f724887SDavid Hildenbrand {
5885f724887SDavid Hildenbrand     s390_vec_sar(v1, v2, count);
5895f724887SDavid Hildenbrand }
5908112274fSDavid Hildenbrand 
591b7a50eb7SDavid Miller void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
592b7a50eb7SDavid Miller                            uint32_t desc)
593b7a50eb7SDavid Miller {
594b7a50eb7SDavid Miller     S390Vector tmp;
595b7a50eb7SDavid Miller     uint32_t sh, e0, e1 = 0;
596b7a50eb7SDavid Miller     int i = 0;
597b7a50eb7SDavid Miller 
598b7a50eb7SDavid Miller     /* Byte 0 is special only. */
599b7a50eb7SDavid Miller     e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
600b7a50eb7SDavid Miller     sh = s390_vec_read_element8(v3, i) & 7;
601b7a50eb7SDavid Miller     s390_vec_write_element8(&tmp, i, e0 >> sh);
602b7a50eb7SDavid Miller 
603b7a50eb7SDavid Miller     e1 = e0;
604b7a50eb7SDavid Miller     for (i = 1; i < 16; ++i, e1 = e0) {
605b7a50eb7SDavid Miller         e0 = s390_vec_read_element8(v2, i);
606b7a50eb7SDavid Miller         sh = s390_vec_read_element8(v3, i) & 7;
607b7a50eb7SDavid Miller         s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
608b7a50eb7SDavid Miller     }
609b7a50eb7SDavid Miller 
610b7a50eb7SDavid Miller     *(S390Vector *)v1 = tmp;
611b7a50eb7SDavid Miller }
612b7a50eb7SDavid Miller 
6138112274fSDavid Hildenbrand void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
6148112274fSDavid Hildenbrand                        uint32_t desc)
6158112274fSDavid Hildenbrand {
6168112274fSDavid Hildenbrand     s390_vec_shr(v1, v2, count);
6178112274fSDavid Hildenbrand }
6181ee2d7baSDavid Hildenbrand 
619b7a50eb7SDavid Miller void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
620b7a50eb7SDavid Miller                            uint32_t desc)
621b7a50eb7SDavid Miller {
622b7a50eb7SDavid Miller     S390Vector tmp;
623b7a50eb7SDavid Miller     uint32_t sh, e0, e1 = 0;
624b7a50eb7SDavid Miller 
625b7a50eb7SDavid Miller     for (int i = 0; i < 16; ++i, e1 = e0) {
626b7a50eb7SDavid Miller         e0 = s390_vec_read_element8(v2, i);
627b7a50eb7SDavid Miller         sh = s390_vec_read_element8(v3, i) & 7;
628b7a50eb7SDavid Miller 
629b7a50eb7SDavid Miller         s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
630b7a50eb7SDavid Miller     }
631b7a50eb7SDavid Miller 
632b7a50eb7SDavid Miller     *(S390Vector *)v1 = tmp;
633b7a50eb7SDavid Miller }
634b7a50eb7SDavid Miller 
6351ee2d7baSDavid Hildenbrand #define DEF_VSCBI(BITS)                                                        \
6361ee2d7baSDavid Hildenbrand void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
6371ee2d7baSDavid Hildenbrand                               uint32_t desc)                                   \
6381ee2d7baSDavid Hildenbrand {                                                                              \
6391ee2d7baSDavid Hildenbrand     int i;                                                                     \
6401ee2d7baSDavid Hildenbrand                                                                                \
6411ee2d7baSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
6421ee2d7baSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
6431ee2d7baSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
6441ee2d7baSDavid Hildenbrand                                                                                \
64523e79774SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a >= b);                           \
6461ee2d7baSDavid Hildenbrand     }                                                                          \
6471ee2d7baSDavid Hildenbrand }
6481ee2d7baSDavid Hildenbrand DEF_VSCBI(8)
6491ee2d7baSDavid Hildenbrand DEF_VSCBI(16)
650db156ebfSDavid Hildenbrand 
651db156ebfSDavid Hildenbrand void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
652db156ebfSDavid Hildenbrand                       uint32_t desc)
653db156ebfSDavid Hildenbrand {
654db156ebfSDavid Hildenbrand     S390Vector tmp;
655db156ebfSDavid Hildenbrand 
656db156ebfSDavid Hildenbrand     s390_vec_and(&tmp, v1, v2);
657db156ebfSDavid Hildenbrand     if (s390_vec_is_zero(&tmp)) {
658db156ebfSDavid Hildenbrand         /* Selected bits all zeros; or all mask bits zero */
659db156ebfSDavid Hildenbrand         env->cc_op = 0;
660db156ebfSDavid Hildenbrand     } else if (s390_vec_equal(&tmp, v2)) {
661db156ebfSDavid Hildenbrand         /* Selected bits all ones */
662db156ebfSDavid Hildenbrand         env->cc_op = 3;
663db156ebfSDavid Hildenbrand     } else {
664db156ebfSDavid Hildenbrand         /* Selected bits a mix of zeros and ones */
665db156ebfSDavid Hildenbrand         env->cc_op = 1;
666db156ebfSDavid Hildenbrand     }
667db156ebfSDavid Hildenbrand }
668