xref: /qemu/target/s390x/tcg/vec_int_helper.c (revision 25c304e93612b5a0ad75c55d43d596b6d3271aad)
1c1a81d4bSDavid Hildenbrand /*
2c1a81d4bSDavid Hildenbrand  * QEMU TCG support -- s390x vector integer instruction support
3c1a81d4bSDavid Hildenbrand  *
4c1a81d4bSDavid Hildenbrand  * Copyright (C) 2019 Red Hat Inc
5c1a81d4bSDavid Hildenbrand  *
6c1a81d4bSDavid Hildenbrand  * Authors:
7c1a81d4bSDavid Hildenbrand  *   David Hildenbrand <david@redhat.com>
8c1a81d4bSDavid Hildenbrand  *
9c1a81d4bSDavid Hildenbrand  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10c1a81d4bSDavid Hildenbrand  * See the COPYING file in the top-level directory.
11c1a81d4bSDavid Hildenbrand  */
12c1a81d4bSDavid Hildenbrand #include "qemu/osdep.h"
13c1a81d4bSDavid Hildenbrand #include "cpu.h"
14c1a81d4bSDavid Hildenbrand #include "vec.h"
15c1a81d4bSDavid Hildenbrand #include "exec/helper-proto.h"
165c4b0ab4SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h"
172d8bc681SRichard Henderson #include "crypto/clmul.h"
18c1a81d4bSDavid Hildenbrand 
19697a45d6SDavid Hildenbrand static bool s390_vec_is_zero(const S390Vector *v)
20697a45d6SDavid Hildenbrand {
21697a45d6SDavid Hildenbrand     return !v->doubleword[0] && !v->doubleword[1];
22697a45d6SDavid Hildenbrand }
23697a45d6SDavid Hildenbrand 
24697a45d6SDavid Hildenbrand static void s390_vec_xor(S390Vector *res, const S390Vector *a,
25697a45d6SDavid Hildenbrand                          const S390Vector *b)
26697a45d6SDavid Hildenbrand {
27697a45d6SDavid Hildenbrand     res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
28697a45d6SDavid Hildenbrand     res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
29697a45d6SDavid Hildenbrand }
30697a45d6SDavid Hildenbrand 
31db156ebfSDavid Hildenbrand static void s390_vec_and(S390Vector *res, const S390Vector *a,
32db156ebfSDavid Hildenbrand                          const S390Vector *b)
33db156ebfSDavid Hildenbrand {
34db156ebfSDavid Hildenbrand     res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
35db156ebfSDavid Hildenbrand     res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
36db156ebfSDavid Hildenbrand }
37db156ebfSDavid Hildenbrand 
38db156ebfSDavid Hildenbrand static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
39db156ebfSDavid Hildenbrand {
40db156ebfSDavid Hildenbrand     return a->doubleword[0] == b->doubleword[0] &&
41db156ebfSDavid Hildenbrand            a->doubleword[1] == b->doubleword[1];
42db156ebfSDavid Hildenbrand }
43db156ebfSDavid Hildenbrand 
44697a45d6SDavid Hildenbrand static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
45697a45d6SDavid Hildenbrand {
46697a45d6SDavid Hildenbrand     uint64_t tmp;
47697a45d6SDavid Hildenbrand 
48697a45d6SDavid Hildenbrand     g_assert(count < 128);
49697a45d6SDavid Hildenbrand     if (count == 0) {
50697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
51697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
52697a45d6SDavid Hildenbrand     } else if (count == 64) {
53697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[1];
54697a45d6SDavid Hildenbrand         d->doubleword[1] = 0;
55697a45d6SDavid Hildenbrand     } else if (count < 64) {
56697a45d6SDavid Hildenbrand         tmp = extract64(a->doubleword[1], 64 - count, count);
57697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1] << count;
58697a45d6SDavid Hildenbrand         d->doubleword[0] = (a->doubleword[0] << count) | tmp;
59697a45d6SDavid Hildenbrand     } else {
60697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[1] << (count - 64);
61697a45d6SDavid Hildenbrand         d->doubleword[1] = 0;
62697a45d6SDavid Hildenbrand     }
63697a45d6SDavid Hildenbrand }
64697a45d6SDavid Hildenbrand 
655f724887SDavid Hildenbrand static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
665f724887SDavid Hildenbrand {
675f724887SDavid Hildenbrand     uint64_t tmp;
685f724887SDavid Hildenbrand 
695f724887SDavid Hildenbrand     if (count == 0) {
705f724887SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
715f724887SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
725f724887SDavid Hildenbrand     } else if (count == 64) {
73b57b3368SDavid Hildenbrand         tmp = (int64_t)a->doubleword[0] >> 63;
745f724887SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0];
75b57b3368SDavid Hildenbrand         d->doubleword[0] = tmp;
765f724887SDavid Hildenbrand     } else if (count < 64) {
775f724887SDavid Hildenbrand         tmp = a->doubleword[1] >> count;
785f724887SDavid Hildenbrand         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
795f724887SDavid Hildenbrand         d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
805f724887SDavid Hildenbrand     } else {
81b57b3368SDavid Hildenbrand         tmp = (int64_t)a->doubleword[0] >> 63;
825f724887SDavid Hildenbrand         d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
83b57b3368SDavid Hildenbrand         d->doubleword[0] = tmp;
845f724887SDavid Hildenbrand     }
855f724887SDavid Hildenbrand }
865f724887SDavid Hildenbrand 
87697a45d6SDavid Hildenbrand static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
88697a45d6SDavid Hildenbrand {
89697a45d6SDavid Hildenbrand     uint64_t tmp;
90697a45d6SDavid Hildenbrand 
91697a45d6SDavid Hildenbrand     g_assert(count < 128);
92697a45d6SDavid Hildenbrand     if (count == 0) {
93697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
94697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
95697a45d6SDavid Hildenbrand     } else if (count == 64) {
96697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0];
97697a45d6SDavid Hildenbrand         d->doubleword[0] = 0;
98697a45d6SDavid Hildenbrand     } else if (count < 64) {
99697a45d6SDavid Hildenbrand         tmp = a->doubleword[1] >> count;
100697a45d6SDavid Hildenbrand         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
101697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0] >> count;
102697a45d6SDavid Hildenbrand     } else {
103697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0] >> (count - 64);
104697a45d6SDavid Hildenbrand         d->doubleword[0] = 0;
105697a45d6SDavid Hildenbrand     }
106697a45d6SDavid Hildenbrand }
107c1a81d4bSDavid Hildenbrand #define DEF_VAVG(BITS)                                                         \
108c1a81d4bSDavid Hildenbrand void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
109c1a81d4bSDavid Hildenbrand                              uint32_t desc)                                    \
110c1a81d4bSDavid Hildenbrand {                                                                              \
111c1a81d4bSDavid Hildenbrand     int i;                                                                     \
112c1a81d4bSDavid Hildenbrand                                                                                \
113c1a81d4bSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
114c1a81d4bSDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
115c1a81d4bSDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
116c1a81d4bSDavid Hildenbrand                                                                                \
117c1a81d4bSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
118c1a81d4bSDavid Hildenbrand     }                                                                          \
119c1a81d4bSDavid Hildenbrand }
120c1a81d4bSDavid Hildenbrand DEF_VAVG(8)
121c1a81d4bSDavid Hildenbrand DEF_VAVG(16)
122801aa78bSDavid Hildenbrand 
123801aa78bSDavid Hildenbrand #define DEF_VAVGL(BITS)                                                        \
124801aa78bSDavid Hildenbrand void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
125801aa78bSDavid Hildenbrand                               uint32_t desc)                                   \
126801aa78bSDavid Hildenbrand {                                                                              \
127801aa78bSDavid Hildenbrand     int i;                                                                     \
128801aa78bSDavid Hildenbrand                                                                                \
129801aa78bSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
130801aa78bSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
131801aa78bSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
132801aa78bSDavid Hildenbrand                                                                                \
133801aa78bSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
134801aa78bSDavid Hildenbrand     }                                                                          \
135801aa78bSDavid Hildenbrand }
136801aa78bSDavid Hildenbrand DEF_VAVGL(8)
137801aa78bSDavid Hildenbrand DEF_VAVGL(16)
13828863f1dSDavid Hildenbrand 
13928863f1dSDavid Hildenbrand #define DEF_VCLZ(BITS)                                                         \
14028863f1dSDavid Hildenbrand void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
14128863f1dSDavid Hildenbrand {                                                                              \
14228863f1dSDavid Hildenbrand     int i;                                                                     \
14328863f1dSDavid Hildenbrand                                                                                \
14428863f1dSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
14528863f1dSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
14628863f1dSDavid Hildenbrand                                                                                \
14728863f1dSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
14828863f1dSDavid Hildenbrand     }                                                                          \
14928863f1dSDavid Hildenbrand }
15028863f1dSDavid Hildenbrand DEF_VCLZ(8)
15128863f1dSDavid Hildenbrand DEF_VCLZ(16)
152449a8ac2SDavid Hildenbrand 
153449a8ac2SDavid Hildenbrand #define DEF_VCTZ(BITS)                                                         \
154449a8ac2SDavid Hildenbrand void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
155449a8ac2SDavid Hildenbrand {                                                                              \
156449a8ac2SDavid Hildenbrand     int i;                                                                     \
157449a8ac2SDavid Hildenbrand                                                                                \
158449a8ac2SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
159449a8ac2SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
160449a8ac2SDavid Hildenbrand                                                                                \
161449a8ac2SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
162449a8ac2SDavid Hildenbrand     }                                                                          \
163449a8ac2SDavid Hildenbrand }
164449a8ac2SDavid Hildenbrand DEF_VCTZ(8)
165449a8ac2SDavid Hildenbrand DEF_VCTZ(16)
166697a45d6SDavid Hildenbrand 
167697a45d6SDavid Hildenbrand /* like binary multiplication, but XOR instead of addition */
168697a45d6SDavid Hildenbrand #define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
169697a45d6SDavid Hildenbrand static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
170697a45d6SDavid Hildenbrand                                              uint##TBITS##_t b)                \
171697a45d6SDavid Hildenbrand {                                                                              \
172697a45d6SDavid Hildenbrand     uint##TBITS##_t res = 0;                                                   \
173697a45d6SDavid Hildenbrand                                                                                \
174697a45d6SDavid Hildenbrand     while (b) {                                                                \
175697a45d6SDavid Hildenbrand         if (b & 0x1) {                                                         \
176697a45d6SDavid Hildenbrand             res = res ^ a;                                                     \
177697a45d6SDavid Hildenbrand         }                                                                      \
178697a45d6SDavid Hildenbrand         a = a << 1;                                                            \
179697a45d6SDavid Hildenbrand         b = b >> 1;                                                            \
180697a45d6SDavid Hildenbrand     }                                                                          \
181697a45d6SDavid Hildenbrand     return res;                                                                \
182697a45d6SDavid Hildenbrand }
183697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(32, 64)
184697a45d6SDavid Hildenbrand 
185697a45d6SDavid Hildenbrand static S390Vector galois_multiply64(uint64_t a, uint64_t b)
186697a45d6SDavid Hildenbrand {
187697a45d6SDavid Hildenbrand     S390Vector res = {};
188697a45d6SDavid Hildenbrand     S390Vector va = {
189697a45d6SDavid Hildenbrand         .doubleword[1] = a,
190697a45d6SDavid Hildenbrand     };
191697a45d6SDavid Hildenbrand     S390Vector vb = {
192697a45d6SDavid Hildenbrand         .doubleword[1] = b,
193697a45d6SDavid Hildenbrand     };
194697a45d6SDavid Hildenbrand 
195697a45d6SDavid Hildenbrand     while (!s390_vec_is_zero(&vb)) {
196697a45d6SDavid Hildenbrand         if (vb.doubleword[1] & 0x1) {
197697a45d6SDavid Hildenbrand             s390_vec_xor(&res, &res, &va);
198697a45d6SDavid Hildenbrand         }
199697a45d6SDavid Hildenbrand         s390_vec_shl(&va, &va, 1);
200697a45d6SDavid Hildenbrand         s390_vec_shr(&vb, &vb, 1);
201697a45d6SDavid Hildenbrand     }
202697a45d6SDavid Hildenbrand     return res;
203697a45d6SDavid Hildenbrand }
204697a45d6SDavid Hildenbrand 
2052d8bc681SRichard Henderson /*
2062d8bc681SRichard Henderson  * There is no carry across the two doublewords, so their order does
2072d8bc681SRichard Henderson  * not matter.  Nor is there partial overlap between registers.
2082d8bc681SRichard Henderson  */
2092d8bc681SRichard Henderson static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
2102d8bc681SRichard Henderson {
2112d8bc681SRichard Henderson     return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
2122d8bc681SRichard Henderson }
2132d8bc681SRichard Henderson 
2142d8bc681SRichard Henderson void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
2152d8bc681SRichard Henderson {
2162d8bc681SRichard Henderson     uint64_t *q1 = v1;
2172d8bc681SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3;
2182d8bc681SRichard Henderson 
2192d8bc681SRichard Henderson     q1[0] = do_gfma8(q2[0], q3[0], 0);
2202d8bc681SRichard Henderson     q1[1] = do_gfma8(q2[1], q3[1], 0);
2212d8bc681SRichard Henderson }
2222d8bc681SRichard Henderson 
2232d8bc681SRichard Henderson void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
2242d8bc681SRichard Henderson                          const void *v4, uint32_t desc)
2252d8bc681SRichard Henderson {
2262d8bc681SRichard Henderson     uint64_t *q1 = v1;
2272d8bc681SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
2282d8bc681SRichard Henderson 
2292d8bc681SRichard Henderson     q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
2302d8bc681SRichard Henderson     q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
2312d8bc681SRichard Henderson }
2322d8bc681SRichard Henderson 
233*25c304e9SRichard Henderson static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a)
234*25c304e9SRichard Henderson {
235*25c304e9SRichard Henderson     return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a;
236*25c304e9SRichard Henderson }
237*25c304e9SRichard Henderson 
238*25c304e9SRichard Henderson void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d)
239*25c304e9SRichard Henderson {
240*25c304e9SRichard Henderson     uint64_t *q1 = v1;
241*25c304e9SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3;
242*25c304e9SRichard Henderson 
243*25c304e9SRichard Henderson     q1[0] = do_gfma16(q2[0], q3[0], 0);
244*25c304e9SRichard Henderson     q1[1] = do_gfma16(q2[1], q3[1], 0);
245*25c304e9SRichard Henderson }
246*25c304e9SRichard Henderson 
247*25c304e9SRichard Henderson void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3,
248*25c304e9SRichard Henderson                          const void *v4, uint32_t d)
249*25c304e9SRichard Henderson {
250*25c304e9SRichard Henderson     uint64_t *q1 = v1;
251*25c304e9SRichard Henderson     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
252*25c304e9SRichard Henderson 
253*25c304e9SRichard Henderson     q1[0] = do_gfma16(q2[0], q3[0], q4[0]);
254*25c304e9SRichard Henderson     q1[1] = do_gfma16(q2[1], q3[1], q4[1]);
255*25c304e9SRichard Henderson }
256*25c304e9SRichard Henderson 
257697a45d6SDavid Hildenbrand #define DEF_VGFM(BITS, TBITS)                                                  \
258697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
259697a45d6SDavid Hildenbrand                              uint32_t desc)                                    \
260697a45d6SDavid Hildenbrand {                                                                              \
261697a45d6SDavid Hildenbrand     int i;                                                                     \
262697a45d6SDavid Hildenbrand                                                                                \
263697a45d6SDavid Hildenbrand     for (i = 0; i < (128 / TBITS); i++) {                                      \
264697a45d6SDavid Hildenbrand         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
265697a45d6SDavid Hildenbrand         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
266697a45d6SDavid Hildenbrand         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
267697a45d6SDavid Hildenbrand                                                                                \
268697a45d6SDavid Hildenbrand         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
269697a45d6SDavid Hildenbrand         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
270697a45d6SDavid Hildenbrand         d = d ^ galois_multiply32(a, b);                                       \
271697a45d6SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, d);                               \
272697a45d6SDavid Hildenbrand     }                                                                          \
273697a45d6SDavid Hildenbrand }
274697a45d6SDavid Hildenbrand DEF_VGFM(32, 64)
275697a45d6SDavid Hildenbrand 
276697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
277697a45d6SDavid Hildenbrand                          uint32_t desc)
278697a45d6SDavid Hildenbrand {
279697a45d6SDavid Hildenbrand     S390Vector tmp1, tmp2;
280697a45d6SDavid Hildenbrand     uint64_t a, b;
281697a45d6SDavid Hildenbrand 
282697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 0);
283697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 0);
284697a45d6SDavid Hildenbrand     tmp1 = galois_multiply64(a, b);
285697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 1);
286697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 1);
287697a45d6SDavid Hildenbrand     tmp2 = galois_multiply64(a, b);
288697a45d6SDavid Hildenbrand     s390_vec_xor(v1, &tmp1, &tmp2);
289697a45d6SDavid Hildenbrand }
290697a45d6SDavid Hildenbrand 
291697a45d6SDavid Hildenbrand #define DEF_VGFMA(BITS, TBITS)                                                 \
292697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
293697a45d6SDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
294697a45d6SDavid Hildenbrand {                                                                              \
295697a45d6SDavid Hildenbrand     int i;                                                                     \
296697a45d6SDavid Hildenbrand                                                                                \
297697a45d6SDavid Hildenbrand     for (i = 0; i < (128 / TBITS); i++) {                                      \
298697a45d6SDavid Hildenbrand         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
299697a45d6SDavid Hildenbrand         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
300697a45d6SDavid Hildenbrand         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
301697a45d6SDavid Hildenbrand                                                                                \
302697a45d6SDavid Hildenbrand         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
303697a45d6SDavid Hildenbrand         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
304697a45d6SDavid Hildenbrand         d = d ^ galois_multiply32(a, b);                                       \
305697a45d6SDavid Hildenbrand         d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
306697a45d6SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, d);                               \
307697a45d6SDavid Hildenbrand     }                                                                          \
308697a45d6SDavid Hildenbrand }
309697a45d6SDavid Hildenbrand DEF_VGFMA(32, 64)
310697a45d6SDavid Hildenbrand 
311697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
312697a45d6SDavid Hildenbrand                           const void *v4, uint32_t desc)
313697a45d6SDavid Hildenbrand {
314697a45d6SDavid Hildenbrand     S390Vector tmp1, tmp2;
315697a45d6SDavid Hildenbrand     uint64_t a, b;
316697a45d6SDavid Hildenbrand 
317697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 0);
318697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 0);
319697a45d6SDavid Hildenbrand     tmp1 = galois_multiply64(a, b);
320697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 1);
321697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 1);
322697a45d6SDavid Hildenbrand     tmp2 = galois_multiply64(a, b);
323697a45d6SDavid Hildenbrand     s390_vec_xor(&tmp1, &tmp1, &tmp2);
324697a45d6SDavid Hildenbrand     s390_vec_xor(v1, &tmp1, v4);
325697a45d6SDavid Hildenbrand }
3261b430aecSDavid Hildenbrand 
3271b430aecSDavid Hildenbrand #define DEF_VMAL(BITS)                                                         \
3281b430aecSDavid Hildenbrand void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
3291b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
3301b430aecSDavid Hildenbrand {                                                                              \
3311b430aecSDavid Hildenbrand     int i;                                                                     \
3321b430aecSDavid Hildenbrand                                                                                \
3331b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
3341b430aecSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
3351b430aecSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
3361b430aecSDavid Hildenbrand         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
3371b430aecSDavid Hildenbrand                                                                                \
3381b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a * b + c);                        \
3391b430aecSDavid Hildenbrand     }                                                                          \
3401b430aecSDavid Hildenbrand }
3411b430aecSDavid Hildenbrand DEF_VMAL(8)
3421b430aecSDavid Hildenbrand DEF_VMAL(16)
3431b430aecSDavid Hildenbrand 
3441b430aecSDavid Hildenbrand #define DEF_VMAH(BITS)                                                         \
3451b430aecSDavid Hildenbrand void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
3461b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
3471b430aecSDavid Hildenbrand {                                                                              \
3481b430aecSDavid Hildenbrand     int i;                                                                     \
3491b430aecSDavid Hildenbrand                                                                                \
3501b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
3511b430aecSDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
3521b430aecSDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
3531b430aecSDavid Hildenbrand         const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
3541b430aecSDavid Hildenbrand                                                                                \
3551b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
3561b430aecSDavid Hildenbrand     }                                                                          \
3571b430aecSDavid Hildenbrand }
3581b430aecSDavid Hildenbrand DEF_VMAH(8)
3591b430aecSDavid Hildenbrand DEF_VMAH(16)
3601b430aecSDavid Hildenbrand 
3611b430aecSDavid Hildenbrand #define DEF_VMALH(BITS)                                                        \
3621b430aecSDavid Hildenbrand void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
3631b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
3641b430aecSDavid Hildenbrand {                                                                              \
3651b430aecSDavid Hildenbrand     int i;                                                                     \
3661b430aecSDavid Hildenbrand                                                                                \
3671b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
3681b430aecSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
3691b430aecSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
3701b430aecSDavid Hildenbrand         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
3711b430aecSDavid Hildenbrand                                                                                \
3721b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
3731b430aecSDavid Hildenbrand     }                                                                          \
3741b430aecSDavid Hildenbrand }
3751b430aecSDavid Hildenbrand DEF_VMALH(8)
3761b430aecSDavid Hildenbrand DEF_VMALH(16)
3771b430aecSDavid Hildenbrand 
3781b430aecSDavid Hildenbrand #define DEF_VMAE(BITS, TBITS)                                                  \
3791b430aecSDavid Hildenbrand void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
3801b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
3811b430aecSDavid Hildenbrand {                                                                              \
3821b430aecSDavid Hildenbrand     int i, j;                                                                  \
3831b430aecSDavid Hildenbrand                                                                                \
3841b430aecSDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
3851b430aecSDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
3861b430aecSDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
3878b952519SDavid Hildenbrand         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
3881b430aecSDavid Hildenbrand                                                                                \
3891b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3901b430aecSDavid Hildenbrand     }                                                                          \
3911b430aecSDavid Hildenbrand }
3921b430aecSDavid Hildenbrand DEF_VMAE(8, 16)
3931b430aecSDavid Hildenbrand DEF_VMAE(16, 32)
3941b430aecSDavid Hildenbrand DEF_VMAE(32, 64)
3951b430aecSDavid Hildenbrand 
3961b430aecSDavid Hildenbrand #define DEF_VMALE(BITS, TBITS)                                                 \
3971b430aecSDavid Hildenbrand void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
3981b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
3991b430aecSDavid Hildenbrand {                                                                              \
4001b430aecSDavid Hildenbrand     int i, j;                                                                  \
4011b430aecSDavid Hildenbrand                                                                                \
4021b430aecSDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
4031b430aecSDavid Hildenbrand         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
4041b430aecSDavid Hildenbrand         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
4058b952519SDavid Hildenbrand         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
4061b430aecSDavid Hildenbrand                                                                                \
4071b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
4081b430aecSDavid Hildenbrand     }                                                                          \
4091b430aecSDavid Hildenbrand }
4101b430aecSDavid Hildenbrand DEF_VMALE(8, 16)
4111b430aecSDavid Hildenbrand DEF_VMALE(16, 32)
4121b430aecSDavid Hildenbrand DEF_VMALE(32, 64)
4131b430aecSDavid Hildenbrand 
4141b430aecSDavid Hildenbrand #define DEF_VMAO(BITS, TBITS)                                                  \
4151b430aecSDavid Hildenbrand void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
4161b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
4171b430aecSDavid Hildenbrand {                                                                              \
4181b430aecSDavid Hildenbrand     int i, j;                                                                  \
4191b430aecSDavid Hildenbrand                                                                                \
4201b430aecSDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
4211b430aecSDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
4221b430aecSDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
4238b952519SDavid Hildenbrand         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
4241b430aecSDavid Hildenbrand                                                                                \
4251b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
4261b430aecSDavid Hildenbrand     }                                                                          \
4271b430aecSDavid Hildenbrand }
4281b430aecSDavid Hildenbrand DEF_VMAO(8, 16)
4291b430aecSDavid Hildenbrand DEF_VMAO(16, 32)
4301b430aecSDavid Hildenbrand DEF_VMAO(32, 64)
4311b430aecSDavid Hildenbrand 
4321b430aecSDavid Hildenbrand #define DEF_VMALO(BITS, TBITS)                                                 \
4331b430aecSDavid Hildenbrand void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
4341b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
4351b430aecSDavid Hildenbrand {                                                                              \
4361b430aecSDavid Hildenbrand     int i, j;                                                                  \
4371b430aecSDavid Hildenbrand                                                                                \
4381b430aecSDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
4391b430aecSDavid Hildenbrand         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
4401b430aecSDavid Hildenbrand         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
4418b952519SDavid Hildenbrand         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
4421b430aecSDavid Hildenbrand                                                                                \
4431b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
4441b430aecSDavid Hildenbrand     }                                                                          \
4451b430aecSDavid Hildenbrand }
4461b430aecSDavid Hildenbrand DEF_VMALO(8, 16)
4471b430aecSDavid Hildenbrand DEF_VMALO(16, 32)
4481b430aecSDavid Hildenbrand DEF_VMALO(32, 64)
4492bf3ee38SDavid Hildenbrand 
4502bf3ee38SDavid Hildenbrand #define DEF_VMH(BITS)                                                          \
4512bf3ee38SDavid Hildenbrand void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
4522bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
4532bf3ee38SDavid Hildenbrand {                                                                              \
4542bf3ee38SDavid Hildenbrand     int i;                                                                     \
4552bf3ee38SDavid Hildenbrand                                                                                \
4562bf3ee38SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
4572bf3ee38SDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
4582bf3ee38SDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
4592bf3ee38SDavid Hildenbrand                                                                                \
4602bf3ee38SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
4612bf3ee38SDavid Hildenbrand     }                                                                          \
4622bf3ee38SDavid Hildenbrand }
4632bf3ee38SDavid Hildenbrand DEF_VMH(8)
4642bf3ee38SDavid Hildenbrand DEF_VMH(16)
4652bf3ee38SDavid Hildenbrand 
4662bf3ee38SDavid Hildenbrand #define DEF_VMLH(BITS)                                                         \
4672bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
4682bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
4692bf3ee38SDavid Hildenbrand {                                                                              \
4702bf3ee38SDavid Hildenbrand     int i;                                                                     \
4712bf3ee38SDavid Hildenbrand                                                                                \
4722bf3ee38SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
4732bf3ee38SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
4742bf3ee38SDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
4752bf3ee38SDavid Hildenbrand                                                                                \
4762bf3ee38SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
4772bf3ee38SDavid Hildenbrand     }                                                                          \
4782bf3ee38SDavid Hildenbrand }
4792bf3ee38SDavid Hildenbrand DEF_VMLH(8)
4802bf3ee38SDavid Hildenbrand DEF_VMLH(16)
4812bf3ee38SDavid Hildenbrand 
4822bf3ee38SDavid Hildenbrand #define DEF_VME(BITS, TBITS)                                                   \
4832bf3ee38SDavid Hildenbrand void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
4842bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
4852bf3ee38SDavid Hildenbrand {                                                                              \
4862bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
4872bf3ee38SDavid Hildenbrand                                                                                \
4882bf3ee38SDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
4892bf3ee38SDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
4902bf3ee38SDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
4912bf3ee38SDavid Hildenbrand                                                                                \
4922bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
4932bf3ee38SDavid Hildenbrand     }                                                                          \
4942bf3ee38SDavid Hildenbrand }
4952bf3ee38SDavid Hildenbrand DEF_VME(8, 16)
4962bf3ee38SDavid Hildenbrand DEF_VME(16, 32)
4972bf3ee38SDavid Hildenbrand DEF_VME(32, 64)
4982bf3ee38SDavid Hildenbrand 
4992bf3ee38SDavid Hildenbrand #define DEF_VMLE(BITS, TBITS)                                                  \
5002bf3ee38SDavid Hildenbrand void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
5012bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
5022bf3ee38SDavid Hildenbrand {                                                                              \
5032bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
5042bf3ee38SDavid Hildenbrand                                                                                \
5052bf3ee38SDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
5062bf3ee38SDavid Hildenbrand         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
5072bf3ee38SDavid Hildenbrand         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
5082bf3ee38SDavid Hildenbrand                                                                                \
5092bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
5102bf3ee38SDavid Hildenbrand     }                                                                          \
5112bf3ee38SDavid Hildenbrand }
5122bf3ee38SDavid Hildenbrand DEF_VMLE(8, 16)
5132bf3ee38SDavid Hildenbrand DEF_VMLE(16, 32)
5142bf3ee38SDavid Hildenbrand DEF_VMLE(32, 64)
5152bf3ee38SDavid Hildenbrand 
5162bf3ee38SDavid Hildenbrand #define DEF_VMO(BITS, TBITS)                                                   \
5172bf3ee38SDavid Hildenbrand void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
5182bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
5192bf3ee38SDavid Hildenbrand {                                                                              \
5202bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
5212bf3ee38SDavid Hildenbrand                                                                                \
5222bf3ee38SDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
5232bf3ee38SDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
5242bf3ee38SDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
5252bf3ee38SDavid Hildenbrand                                                                                \
5262bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
5272bf3ee38SDavid Hildenbrand     }                                                                          \
5282bf3ee38SDavid Hildenbrand }
5292bf3ee38SDavid Hildenbrand DEF_VMO(8, 16)
5302bf3ee38SDavid Hildenbrand DEF_VMO(16, 32)
5312bf3ee38SDavid Hildenbrand DEF_VMO(32, 64)
5322bf3ee38SDavid Hildenbrand 
5332bf3ee38SDavid Hildenbrand #define DEF_VMLO(BITS, TBITS)                                                  \
5342bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
5352bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
5362bf3ee38SDavid Hildenbrand {                                                                              \
5372bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
5382bf3ee38SDavid Hildenbrand                                                                                \
53949a7ce4eSDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
5402bf3ee38SDavid Hildenbrand         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
5412bf3ee38SDavid Hildenbrand         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
5422bf3ee38SDavid Hildenbrand                                                                                \
5432bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
5442bf3ee38SDavid Hildenbrand     }                                                                          \
5452bf3ee38SDavid Hildenbrand }
5462bf3ee38SDavid Hildenbrand DEF_VMLO(8, 16)
5472bf3ee38SDavid Hildenbrand DEF_VMLO(16, 32)
5482bf3ee38SDavid Hildenbrand DEF_VMLO(32, 64)
549c3838aaaSDavid Hildenbrand 
550c3838aaaSDavid Hildenbrand #define DEF_VPOPCT(BITS)                                                       \
551c3838aaaSDavid Hildenbrand void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
552c3838aaaSDavid Hildenbrand {                                                                              \
553c3838aaaSDavid Hildenbrand     int i;                                                                     \
554c3838aaaSDavid Hildenbrand                                                                                \
555c3838aaaSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
556c3838aaaSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
557c3838aaaSDavid Hildenbrand                                                                                \
558c3838aaaSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
559c3838aaaSDavid Hildenbrand     }                                                                          \
560c3838aaaSDavid Hildenbrand }
561c3838aaaSDavid Hildenbrand DEF_VPOPCT(8)
562c3838aaaSDavid Hildenbrand DEF_VPOPCT(16)
56355236da2SDavid Hildenbrand 
5645c4b0ab4SDavid Hildenbrand #define DEF_VERIM(BITS)                                                        \
5655c4b0ab4SDavid Hildenbrand void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
5665c4b0ab4SDavid Hildenbrand                               uint32_t desc)                                   \
5675c4b0ab4SDavid Hildenbrand {                                                                              \
5685c4b0ab4SDavid Hildenbrand     const uint8_t count = simd_data(desc);                                     \
5695c4b0ab4SDavid Hildenbrand     int i;                                                                     \
5705c4b0ab4SDavid Hildenbrand                                                                                \
5715c4b0ab4SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
5725c4b0ab4SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
5735c4b0ab4SDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
5745c4b0ab4SDavid Hildenbrand         const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
5755c4b0ab4SDavid Hildenbrand         const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
5765c4b0ab4SDavid Hildenbrand                                                                                \
5775c4b0ab4SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, d);                                \
5785c4b0ab4SDavid Hildenbrand     }                                                                          \
5795c4b0ab4SDavid Hildenbrand }
5805c4b0ab4SDavid Hildenbrand DEF_VERIM(8)
5815c4b0ab4SDavid Hildenbrand DEF_VERIM(16)
582dea33fc3SDavid Hildenbrand 
583dea33fc3SDavid Hildenbrand void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
584dea33fc3SDavid Hildenbrand                       uint32_t desc)
585dea33fc3SDavid Hildenbrand {
586dea33fc3SDavid Hildenbrand     s390_vec_shl(v1, v2, count);
587dea33fc3SDavid Hildenbrand }
5885f724887SDavid Hildenbrand 
589b7a50eb7SDavid Miller void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
590b7a50eb7SDavid Miller                           uint32_t desc)
591b7a50eb7SDavid Miller {
592b7a50eb7SDavid Miller     S390Vector tmp;
593b7a50eb7SDavid Miller     uint32_t sh, e0, e1 = 0;
594b7a50eb7SDavid Miller     int i;
595b7a50eb7SDavid Miller 
596b7a50eb7SDavid Miller     for (i = 15; i >= 0; --i, e1 = e0) {
597b7a50eb7SDavid Miller         e0 = s390_vec_read_element8(v2, i);
598b7a50eb7SDavid Miller         sh = s390_vec_read_element8(v3, i) & 7;
599b7a50eb7SDavid Miller 
600b7a50eb7SDavid Miller         s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
601b7a50eb7SDavid Miller     }
602b7a50eb7SDavid Miller 
603b7a50eb7SDavid Miller     *(S390Vector *)v1 = tmp;
604b7a50eb7SDavid Miller }
605b7a50eb7SDavid Miller 
6065f724887SDavid Hildenbrand void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
6075f724887SDavid Hildenbrand                        uint32_t desc)
6085f724887SDavid Hildenbrand {
6095f724887SDavid Hildenbrand     s390_vec_sar(v1, v2, count);
6105f724887SDavid Hildenbrand }
6118112274fSDavid Hildenbrand 
612b7a50eb7SDavid Miller void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
613b7a50eb7SDavid Miller                            uint32_t desc)
614b7a50eb7SDavid Miller {
615b7a50eb7SDavid Miller     S390Vector tmp;
616b7a50eb7SDavid Miller     uint32_t sh, e0, e1 = 0;
617b7a50eb7SDavid Miller     int i = 0;
618b7a50eb7SDavid Miller 
619b7a50eb7SDavid Miller     /* Byte 0 is special only. */
620b7a50eb7SDavid Miller     e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
621b7a50eb7SDavid Miller     sh = s390_vec_read_element8(v3, i) & 7;
622b7a50eb7SDavid Miller     s390_vec_write_element8(&tmp, i, e0 >> sh);
623b7a50eb7SDavid Miller 
624b7a50eb7SDavid Miller     e1 = e0;
625b7a50eb7SDavid Miller     for (i = 1; i < 16; ++i, e1 = e0) {
626b7a50eb7SDavid Miller         e0 = s390_vec_read_element8(v2, i);
627b7a50eb7SDavid Miller         sh = s390_vec_read_element8(v3, i) & 7;
628b7a50eb7SDavid Miller         s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
629b7a50eb7SDavid Miller     }
630b7a50eb7SDavid Miller 
631b7a50eb7SDavid Miller     *(S390Vector *)v1 = tmp;
632b7a50eb7SDavid Miller }
633b7a50eb7SDavid Miller 
6348112274fSDavid Hildenbrand void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
6358112274fSDavid Hildenbrand                        uint32_t desc)
6368112274fSDavid Hildenbrand {
6378112274fSDavid Hildenbrand     s390_vec_shr(v1, v2, count);
6388112274fSDavid Hildenbrand }
6391ee2d7baSDavid Hildenbrand 
640b7a50eb7SDavid Miller void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
641b7a50eb7SDavid Miller                            uint32_t desc)
642b7a50eb7SDavid Miller {
643b7a50eb7SDavid Miller     S390Vector tmp;
644b7a50eb7SDavid Miller     uint32_t sh, e0, e1 = 0;
645b7a50eb7SDavid Miller 
646b7a50eb7SDavid Miller     for (int i = 0; i < 16; ++i, e1 = e0) {
647b7a50eb7SDavid Miller         e0 = s390_vec_read_element8(v2, i);
648b7a50eb7SDavid Miller         sh = s390_vec_read_element8(v3, i) & 7;
649b7a50eb7SDavid Miller 
650b7a50eb7SDavid Miller         s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
651b7a50eb7SDavid Miller     }
652b7a50eb7SDavid Miller 
653b7a50eb7SDavid Miller     *(S390Vector *)v1 = tmp;
654b7a50eb7SDavid Miller }
655b7a50eb7SDavid Miller 
6561ee2d7baSDavid Hildenbrand #define DEF_VSCBI(BITS)                                                        \
6571ee2d7baSDavid Hildenbrand void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
6581ee2d7baSDavid Hildenbrand                               uint32_t desc)                                   \
6591ee2d7baSDavid Hildenbrand {                                                                              \
6601ee2d7baSDavid Hildenbrand     int i;                                                                     \
6611ee2d7baSDavid Hildenbrand                                                                                \
6621ee2d7baSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
6631ee2d7baSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
6641ee2d7baSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
6651ee2d7baSDavid Hildenbrand                                                                                \
66623e79774SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a >= b);                           \
6671ee2d7baSDavid Hildenbrand     }                                                                          \
6681ee2d7baSDavid Hildenbrand }
6691ee2d7baSDavid Hildenbrand DEF_VSCBI(8)
6701ee2d7baSDavid Hildenbrand DEF_VSCBI(16)
671db156ebfSDavid Hildenbrand 
672db156ebfSDavid Hildenbrand void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
673db156ebfSDavid Hildenbrand                       uint32_t desc)
674db156ebfSDavid Hildenbrand {
675db156ebfSDavid Hildenbrand     S390Vector tmp;
676db156ebfSDavid Hildenbrand 
677db156ebfSDavid Hildenbrand     s390_vec_and(&tmp, v1, v2);
678db156ebfSDavid Hildenbrand     if (s390_vec_is_zero(&tmp)) {
679db156ebfSDavid Hildenbrand         /* Selected bits all zeros; or all mask bits zero */
680db156ebfSDavid Hildenbrand         env->cc_op = 0;
681db156ebfSDavid Hildenbrand     } else if (s390_vec_equal(&tmp, v2)) {
682db156ebfSDavid Hildenbrand         /* Selected bits all ones */
683db156ebfSDavid Hildenbrand         env->cc_op = 3;
684db156ebfSDavid Hildenbrand     } else {
685db156ebfSDavid Hildenbrand         /* Selected bits a mix of zeros and ones */
686db156ebfSDavid Hildenbrand         env->cc_op = 1;
687db156ebfSDavid Hildenbrand     }
688db156ebfSDavid Hildenbrand }
689