xref: /qemu/target/s390x/tcg/vec_int_helper.c (revision 1ee2d7ba72f6539f6e5cab218c7ce789456d60f4)
1c1a81d4bSDavid Hildenbrand /*
2c1a81d4bSDavid Hildenbrand  * QEMU TCG support -- s390x vector integer instruction support
3c1a81d4bSDavid Hildenbrand  *
4c1a81d4bSDavid Hildenbrand  * Copyright (C) 2019 Red Hat Inc
5c1a81d4bSDavid Hildenbrand  *
6c1a81d4bSDavid Hildenbrand  * Authors:
7c1a81d4bSDavid Hildenbrand  *   David Hildenbrand <david@redhat.com>
8c1a81d4bSDavid Hildenbrand  *
9c1a81d4bSDavid Hildenbrand  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10c1a81d4bSDavid Hildenbrand  * See the COPYING file in the top-level directory.
11c1a81d4bSDavid Hildenbrand  */
12c1a81d4bSDavid Hildenbrand #include "qemu/osdep.h"
13c1a81d4bSDavid Hildenbrand #include "qemu-common.h"
14c1a81d4bSDavid Hildenbrand #include "cpu.h"
15c1a81d4bSDavid Hildenbrand #include "vec.h"
16c1a81d4bSDavid Hildenbrand #include "exec/helper-proto.h"
175c4b0ab4SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h"
18c1a81d4bSDavid Hildenbrand 
19697a45d6SDavid Hildenbrand static bool s390_vec_is_zero(const S390Vector *v)
20697a45d6SDavid Hildenbrand {
21697a45d6SDavid Hildenbrand     return !v->doubleword[0] && !v->doubleword[1];
22697a45d6SDavid Hildenbrand }
23697a45d6SDavid Hildenbrand 
24697a45d6SDavid Hildenbrand static void s390_vec_xor(S390Vector *res, const S390Vector *a,
25697a45d6SDavid Hildenbrand                          const S390Vector *b)
26697a45d6SDavid Hildenbrand {
27697a45d6SDavid Hildenbrand     res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
28697a45d6SDavid Hildenbrand     res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
29697a45d6SDavid Hildenbrand }
30697a45d6SDavid Hildenbrand 
31697a45d6SDavid Hildenbrand static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
32697a45d6SDavid Hildenbrand {
33697a45d6SDavid Hildenbrand     uint64_t tmp;
34697a45d6SDavid Hildenbrand 
35697a45d6SDavid Hildenbrand     g_assert(count < 128);
36697a45d6SDavid Hildenbrand     if (count == 0) {
37697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
38697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
39697a45d6SDavid Hildenbrand     } else if (count == 64) {
40697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[1];
41697a45d6SDavid Hildenbrand         d->doubleword[1] = 0;
42697a45d6SDavid Hildenbrand     } else if (count < 64) {
43697a45d6SDavid Hildenbrand         tmp = extract64(a->doubleword[1], 64 - count, count);
44697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1] << count;
45697a45d6SDavid Hildenbrand         d->doubleword[0] = (a->doubleword[0] << count) | tmp;
46697a45d6SDavid Hildenbrand     } else {
47697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[1] << (count - 64);
48697a45d6SDavid Hildenbrand         d->doubleword[1] = 0;
49697a45d6SDavid Hildenbrand     }
50697a45d6SDavid Hildenbrand }
51697a45d6SDavid Hildenbrand 
525f724887SDavid Hildenbrand static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
535f724887SDavid Hildenbrand {
545f724887SDavid Hildenbrand     uint64_t tmp;
555f724887SDavid Hildenbrand 
565f724887SDavid Hildenbrand     if (count == 0) {
575f724887SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
585f724887SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
595f724887SDavid Hildenbrand     } else if (count == 64) {
605f724887SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0];
615f724887SDavid Hildenbrand         d->doubleword[0] = 0;
625f724887SDavid Hildenbrand     } else if (count < 64) {
635f724887SDavid Hildenbrand         tmp = a->doubleword[1] >> count;
645f724887SDavid Hildenbrand         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
655f724887SDavid Hildenbrand         d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
665f724887SDavid Hildenbrand     } else {
675f724887SDavid Hildenbrand         d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
685f724887SDavid Hildenbrand         d->doubleword[0] = 0;
695f724887SDavid Hildenbrand     }
705f724887SDavid Hildenbrand }
715f724887SDavid Hildenbrand 
72697a45d6SDavid Hildenbrand static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
73697a45d6SDavid Hildenbrand {
74697a45d6SDavid Hildenbrand     uint64_t tmp;
75697a45d6SDavid Hildenbrand 
76697a45d6SDavid Hildenbrand     g_assert(count < 128);
77697a45d6SDavid Hildenbrand     if (count == 0) {
78697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
79697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
80697a45d6SDavid Hildenbrand     } else if (count == 64) {
81697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0];
82697a45d6SDavid Hildenbrand         d->doubleword[0] = 0;
83697a45d6SDavid Hildenbrand     } else if (count < 64) {
84697a45d6SDavid Hildenbrand         tmp = a->doubleword[1] >> count;
85697a45d6SDavid Hildenbrand         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
86697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0] >> count;
87697a45d6SDavid Hildenbrand     } else {
88697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0] >> (count - 64);
89697a45d6SDavid Hildenbrand         d->doubleword[0] = 0;
90697a45d6SDavid Hildenbrand     }
91697a45d6SDavid Hildenbrand }
92c1a81d4bSDavid Hildenbrand #define DEF_VAVG(BITS)                                                         \
93c1a81d4bSDavid Hildenbrand void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
94c1a81d4bSDavid Hildenbrand                              uint32_t desc)                                    \
95c1a81d4bSDavid Hildenbrand {                                                                              \
96c1a81d4bSDavid Hildenbrand     int i;                                                                     \
97c1a81d4bSDavid Hildenbrand                                                                                \
98c1a81d4bSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
99c1a81d4bSDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
100c1a81d4bSDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
101c1a81d4bSDavid Hildenbrand                                                                                \
102c1a81d4bSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
103c1a81d4bSDavid Hildenbrand     }                                                                          \
104c1a81d4bSDavid Hildenbrand }
105c1a81d4bSDavid Hildenbrand DEF_VAVG(8)
106c1a81d4bSDavid Hildenbrand DEF_VAVG(16)
107801aa78bSDavid Hildenbrand 
108801aa78bSDavid Hildenbrand #define DEF_VAVGL(BITS)                                                        \
109801aa78bSDavid Hildenbrand void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
110801aa78bSDavid Hildenbrand                               uint32_t desc)                                   \
111801aa78bSDavid Hildenbrand {                                                                              \
112801aa78bSDavid Hildenbrand     int i;                                                                     \
113801aa78bSDavid Hildenbrand                                                                                \
114801aa78bSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
115801aa78bSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
116801aa78bSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
117801aa78bSDavid Hildenbrand                                                                                \
118801aa78bSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
119801aa78bSDavid Hildenbrand     }                                                                          \
120801aa78bSDavid Hildenbrand }
121801aa78bSDavid Hildenbrand DEF_VAVGL(8)
122801aa78bSDavid Hildenbrand DEF_VAVGL(16)
12328863f1dSDavid Hildenbrand 
12428863f1dSDavid Hildenbrand #define DEF_VCLZ(BITS)                                                         \
12528863f1dSDavid Hildenbrand void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
12628863f1dSDavid Hildenbrand {                                                                              \
12728863f1dSDavid Hildenbrand     int i;                                                                     \
12828863f1dSDavid Hildenbrand                                                                                \
12928863f1dSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
13028863f1dSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
13128863f1dSDavid Hildenbrand                                                                                \
13228863f1dSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
13328863f1dSDavid Hildenbrand     }                                                                          \
13428863f1dSDavid Hildenbrand }
13528863f1dSDavid Hildenbrand DEF_VCLZ(8)
13628863f1dSDavid Hildenbrand DEF_VCLZ(16)
137449a8ac2SDavid Hildenbrand 
138449a8ac2SDavid Hildenbrand #define DEF_VCTZ(BITS)                                                         \
139449a8ac2SDavid Hildenbrand void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
140449a8ac2SDavid Hildenbrand {                                                                              \
141449a8ac2SDavid Hildenbrand     int i;                                                                     \
142449a8ac2SDavid Hildenbrand                                                                                \
143449a8ac2SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
144449a8ac2SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
145449a8ac2SDavid Hildenbrand                                                                                \
146449a8ac2SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
147449a8ac2SDavid Hildenbrand     }                                                                          \
148449a8ac2SDavid Hildenbrand }
149449a8ac2SDavid Hildenbrand DEF_VCTZ(8)
150449a8ac2SDavid Hildenbrand DEF_VCTZ(16)
151697a45d6SDavid Hildenbrand 
152697a45d6SDavid Hildenbrand /* like binary multiplication, but XOR instead of addition */
153697a45d6SDavid Hildenbrand #define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
154697a45d6SDavid Hildenbrand static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
155697a45d6SDavid Hildenbrand                                              uint##TBITS##_t b)                \
156697a45d6SDavid Hildenbrand {                                                                              \
157697a45d6SDavid Hildenbrand     uint##TBITS##_t res = 0;                                                   \
158697a45d6SDavid Hildenbrand                                                                                \
159697a45d6SDavid Hildenbrand     while (b) {                                                                \
160697a45d6SDavid Hildenbrand         if (b & 0x1) {                                                         \
161697a45d6SDavid Hildenbrand             res = res ^ a;                                                     \
162697a45d6SDavid Hildenbrand         }                                                                      \
163697a45d6SDavid Hildenbrand         a = a << 1;                                                            \
164697a45d6SDavid Hildenbrand         b = b >> 1;                                                            \
165697a45d6SDavid Hildenbrand     }                                                                          \
166697a45d6SDavid Hildenbrand     return res;                                                                \
167697a45d6SDavid Hildenbrand }
168697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(8, 16)
169697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(16, 32)
170697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(32, 64)
171697a45d6SDavid Hildenbrand 
172697a45d6SDavid Hildenbrand static S390Vector galois_multiply64(uint64_t a, uint64_t b)
173697a45d6SDavid Hildenbrand {
174697a45d6SDavid Hildenbrand     S390Vector res = {};
175697a45d6SDavid Hildenbrand     S390Vector va = {
176697a45d6SDavid Hildenbrand         .doubleword[1] = a,
177697a45d6SDavid Hildenbrand     };
178697a45d6SDavid Hildenbrand     S390Vector vb = {
179697a45d6SDavid Hildenbrand         .doubleword[1] = b,
180697a45d6SDavid Hildenbrand     };
181697a45d6SDavid Hildenbrand 
182697a45d6SDavid Hildenbrand     while (!s390_vec_is_zero(&vb)) {
183697a45d6SDavid Hildenbrand         if (vb.doubleword[1] & 0x1) {
184697a45d6SDavid Hildenbrand             s390_vec_xor(&res, &res, &va);
185697a45d6SDavid Hildenbrand         }
186697a45d6SDavid Hildenbrand         s390_vec_shl(&va, &va, 1);
187697a45d6SDavid Hildenbrand         s390_vec_shr(&vb, &vb, 1);
188697a45d6SDavid Hildenbrand     }
189697a45d6SDavid Hildenbrand     return res;
190697a45d6SDavid Hildenbrand }
191697a45d6SDavid Hildenbrand 
192697a45d6SDavid Hildenbrand #define DEF_VGFM(BITS, TBITS)                                                  \
193697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
194697a45d6SDavid Hildenbrand                              uint32_t desc)                                    \
195697a45d6SDavid Hildenbrand {                                                                              \
196697a45d6SDavid Hildenbrand     int i;                                                                     \
197697a45d6SDavid Hildenbrand                                                                                \
198697a45d6SDavid Hildenbrand     for (i = 0; i < (128 / TBITS); i++) {                                      \
199697a45d6SDavid Hildenbrand         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
200697a45d6SDavid Hildenbrand         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
201697a45d6SDavid Hildenbrand         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
202697a45d6SDavid Hildenbrand                                                                                \
203697a45d6SDavid Hildenbrand         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
204697a45d6SDavid Hildenbrand         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
205697a45d6SDavid Hildenbrand         d = d ^ galois_multiply32(a, b);                                       \
206697a45d6SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, d);                               \
207697a45d6SDavid Hildenbrand     }                                                                          \
208697a45d6SDavid Hildenbrand }
209697a45d6SDavid Hildenbrand DEF_VGFM(8, 16)
210697a45d6SDavid Hildenbrand DEF_VGFM(16, 32)
211697a45d6SDavid Hildenbrand DEF_VGFM(32, 64)
212697a45d6SDavid Hildenbrand 
213697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
214697a45d6SDavid Hildenbrand                          uint32_t desc)
215697a45d6SDavid Hildenbrand {
216697a45d6SDavid Hildenbrand     S390Vector tmp1, tmp2;
217697a45d6SDavid Hildenbrand     uint64_t a, b;
218697a45d6SDavid Hildenbrand 
219697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 0);
220697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 0);
221697a45d6SDavid Hildenbrand     tmp1 = galois_multiply64(a, b);
222697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 1);
223697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 1);
224697a45d6SDavid Hildenbrand     tmp2 = galois_multiply64(a, b);
225697a45d6SDavid Hildenbrand     s390_vec_xor(v1, &tmp1, &tmp2);
226697a45d6SDavid Hildenbrand }
227697a45d6SDavid Hildenbrand 
228697a45d6SDavid Hildenbrand #define DEF_VGFMA(BITS, TBITS)                                                 \
229697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
230697a45d6SDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
231697a45d6SDavid Hildenbrand {                                                                              \
232697a45d6SDavid Hildenbrand     int i;                                                                     \
233697a45d6SDavid Hildenbrand                                                                                \
234697a45d6SDavid Hildenbrand     for (i = 0; i < (128 / TBITS); i++) {                                      \
235697a45d6SDavid Hildenbrand         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
236697a45d6SDavid Hildenbrand         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
237697a45d6SDavid Hildenbrand         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
238697a45d6SDavid Hildenbrand                                                                                \
239697a45d6SDavid Hildenbrand         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
240697a45d6SDavid Hildenbrand         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
241697a45d6SDavid Hildenbrand         d = d ^ galois_multiply32(a, b);                                       \
242697a45d6SDavid Hildenbrand         d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
243697a45d6SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, d);                               \
244697a45d6SDavid Hildenbrand     }                                                                          \
245697a45d6SDavid Hildenbrand }
246697a45d6SDavid Hildenbrand DEF_VGFMA(8, 16)
247697a45d6SDavid Hildenbrand DEF_VGFMA(16, 32)
248697a45d6SDavid Hildenbrand DEF_VGFMA(32, 64)
249697a45d6SDavid Hildenbrand 
250697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
251697a45d6SDavid Hildenbrand                           const void *v4, uint32_t desc)
252697a45d6SDavid Hildenbrand {
253697a45d6SDavid Hildenbrand     S390Vector tmp1, tmp2;
254697a45d6SDavid Hildenbrand     uint64_t a, b;
255697a45d6SDavid Hildenbrand 
256697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 0);
257697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 0);
258697a45d6SDavid Hildenbrand     tmp1 = galois_multiply64(a, b);
259697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 1);
260697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 1);
261697a45d6SDavid Hildenbrand     tmp2 = galois_multiply64(a, b);
262697a45d6SDavid Hildenbrand     s390_vec_xor(&tmp1, &tmp1, &tmp2);
263697a45d6SDavid Hildenbrand     s390_vec_xor(v1, &tmp1, v4);
264697a45d6SDavid Hildenbrand }
2651b430aecSDavid Hildenbrand 
2661b430aecSDavid Hildenbrand #define DEF_VMAL(BITS)                                                         \
2671b430aecSDavid Hildenbrand void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
2681b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
2691b430aecSDavid Hildenbrand {                                                                              \
2701b430aecSDavid Hildenbrand     int i;                                                                     \
2711b430aecSDavid Hildenbrand                                                                                \
2721b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
2731b430aecSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
2741b430aecSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
2751b430aecSDavid Hildenbrand         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
2761b430aecSDavid Hildenbrand                                                                                \
2771b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a * b + c);                        \
2781b430aecSDavid Hildenbrand     }                                                                          \
2791b430aecSDavid Hildenbrand }
2801b430aecSDavid Hildenbrand DEF_VMAL(8)
2811b430aecSDavid Hildenbrand DEF_VMAL(16)
2821b430aecSDavid Hildenbrand 
2831b430aecSDavid Hildenbrand #define DEF_VMAH(BITS)                                                         \
2841b430aecSDavid Hildenbrand void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
2851b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
2861b430aecSDavid Hildenbrand {                                                                              \
2871b430aecSDavid Hildenbrand     int i;                                                                     \
2881b430aecSDavid Hildenbrand                                                                                \
2891b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
2901b430aecSDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
2911b430aecSDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
2921b430aecSDavid Hildenbrand         const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
2931b430aecSDavid Hildenbrand                                                                                \
2941b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
2951b430aecSDavid Hildenbrand     }                                                                          \
2961b430aecSDavid Hildenbrand }
2971b430aecSDavid Hildenbrand DEF_VMAH(8)
2981b430aecSDavid Hildenbrand DEF_VMAH(16)
2991b430aecSDavid Hildenbrand 
3001b430aecSDavid Hildenbrand #define DEF_VMALH(BITS)                                                        \
3011b430aecSDavid Hildenbrand void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
3021b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
3031b430aecSDavid Hildenbrand {                                                                              \
3041b430aecSDavid Hildenbrand     int i;                                                                     \
3051b430aecSDavid Hildenbrand                                                                                \
3061b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
3071b430aecSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
3081b430aecSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
3091b430aecSDavid Hildenbrand         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
3101b430aecSDavid Hildenbrand                                                                                \
3111b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
3121b430aecSDavid Hildenbrand     }                                                                          \
3131b430aecSDavid Hildenbrand }
3141b430aecSDavid Hildenbrand DEF_VMALH(8)
3151b430aecSDavid Hildenbrand DEF_VMALH(16)
3161b430aecSDavid Hildenbrand 
3171b430aecSDavid Hildenbrand #define DEF_VMAE(BITS, TBITS)                                                  \
3181b430aecSDavid Hildenbrand void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
3191b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
3201b430aecSDavid Hildenbrand {                                                                              \
3211b430aecSDavid Hildenbrand     int i, j;                                                                  \
3221b430aecSDavid Hildenbrand                                                                                \
3231b430aecSDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
3241b430aecSDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
3251b430aecSDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
3261b430aecSDavid Hildenbrand         int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j);  \
3271b430aecSDavid Hildenbrand                                                                                \
3281b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3291b430aecSDavid Hildenbrand     }                                                                          \
3301b430aecSDavid Hildenbrand }
3311b430aecSDavid Hildenbrand DEF_VMAE(8, 16)
3321b430aecSDavid Hildenbrand DEF_VMAE(16, 32)
3331b430aecSDavid Hildenbrand DEF_VMAE(32, 64)
3341b430aecSDavid Hildenbrand 
3351b430aecSDavid Hildenbrand #define DEF_VMALE(BITS, TBITS)                                                 \
3361b430aecSDavid Hildenbrand void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
3371b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
3381b430aecSDavid Hildenbrand {                                                                              \
3391b430aecSDavid Hildenbrand     int i, j;                                                                  \
3401b430aecSDavid Hildenbrand                                                                                \
3411b430aecSDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
3421b430aecSDavid Hildenbrand         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
3431b430aecSDavid Hildenbrand         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
3441b430aecSDavid Hildenbrand         uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j);                \
3451b430aecSDavid Hildenbrand                                                                                \
3461b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3471b430aecSDavid Hildenbrand     }                                                                          \
3481b430aecSDavid Hildenbrand }
3491b430aecSDavid Hildenbrand DEF_VMALE(8, 16)
3501b430aecSDavid Hildenbrand DEF_VMALE(16, 32)
3511b430aecSDavid Hildenbrand DEF_VMALE(32, 64)
3521b430aecSDavid Hildenbrand 
3531b430aecSDavid Hildenbrand #define DEF_VMAO(BITS, TBITS)                                                  \
3541b430aecSDavid Hildenbrand void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
3551b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
3561b430aecSDavid Hildenbrand {                                                                              \
3571b430aecSDavid Hildenbrand     int i, j;                                                                  \
3581b430aecSDavid Hildenbrand                                                                                \
3591b430aecSDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
3601b430aecSDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
3611b430aecSDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
3621b430aecSDavid Hildenbrand         int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j);  \
3631b430aecSDavid Hildenbrand                                                                                \
3641b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3651b430aecSDavid Hildenbrand     }                                                                          \
3661b430aecSDavid Hildenbrand }
3671b430aecSDavid Hildenbrand DEF_VMAO(8, 16)
3681b430aecSDavid Hildenbrand DEF_VMAO(16, 32)
3691b430aecSDavid Hildenbrand DEF_VMAO(32, 64)
3701b430aecSDavid Hildenbrand 
3711b430aecSDavid Hildenbrand #define DEF_VMALO(BITS, TBITS)                                                 \
3721b430aecSDavid Hildenbrand void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
3731b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
3741b430aecSDavid Hildenbrand {                                                                              \
3751b430aecSDavid Hildenbrand     int i, j;                                                                  \
3761b430aecSDavid Hildenbrand                                                                                \
3771b430aecSDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
3781b430aecSDavid Hildenbrand         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
3791b430aecSDavid Hildenbrand         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
3801b430aecSDavid Hildenbrand         uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j);                \
3811b430aecSDavid Hildenbrand                                                                                \
3821b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3831b430aecSDavid Hildenbrand     }                                                                          \
3841b430aecSDavid Hildenbrand }
3851b430aecSDavid Hildenbrand DEF_VMALO(8, 16)
3861b430aecSDavid Hildenbrand DEF_VMALO(16, 32)
3871b430aecSDavid Hildenbrand DEF_VMALO(32, 64)
3882bf3ee38SDavid Hildenbrand 
3892bf3ee38SDavid Hildenbrand #define DEF_VMH(BITS)                                                          \
3902bf3ee38SDavid Hildenbrand void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
3912bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
3922bf3ee38SDavid Hildenbrand {                                                                              \
3932bf3ee38SDavid Hildenbrand     int i;                                                                     \
3942bf3ee38SDavid Hildenbrand                                                                                \
3952bf3ee38SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
3962bf3ee38SDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
3972bf3ee38SDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
3982bf3ee38SDavid Hildenbrand                                                                                \
3992bf3ee38SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
4002bf3ee38SDavid Hildenbrand     }                                                                          \
4012bf3ee38SDavid Hildenbrand }
4022bf3ee38SDavid Hildenbrand DEF_VMH(8)
4032bf3ee38SDavid Hildenbrand DEF_VMH(16)
4042bf3ee38SDavid Hildenbrand 
4052bf3ee38SDavid Hildenbrand #define DEF_VMLH(BITS)                                                         \
4062bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
4072bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
4082bf3ee38SDavid Hildenbrand {                                                                              \
4092bf3ee38SDavid Hildenbrand     int i;                                                                     \
4102bf3ee38SDavid Hildenbrand                                                                                \
4112bf3ee38SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
4122bf3ee38SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
4132bf3ee38SDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
4142bf3ee38SDavid Hildenbrand                                                                                \
4152bf3ee38SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
4162bf3ee38SDavid Hildenbrand     }                                                                          \
4172bf3ee38SDavid Hildenbrand }
4182bf3ee38SDavid Hildenbrand DEF_VMLH(8)
4192bf3ee38SDavid Hildenbrand DEF_VMLH(16)
4202bf3ee38SDavid Hildenbrand 
4212bf3ee38SDavid Hildenbrand #define DEF_VME(BITS, TBITS)                                                   \
4222bf3ee38SDavid Hildenbrand void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
4232bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
4242bf3ee38SDavid Hildenbrand {                                                                              \
4252bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
4262bf3ee38SDavid Hildenbrand                                                                                \
4272bf3ee38SDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
4282bf3ee38SDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
4292bf3ee38SDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
4302bf3ee38SDavid Hildenbrand                                                                                \
4312bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
4322bf3ee38SDavid Hildenbrand     }                                                                          \
4332bf3ee38SDavid Hildenbrand }
4342bf3ee38SDavid Hildenbrand DEF_VME(8, 16)
4352bf3ee38SDavid Hildenbrand DEF_VME(16, 32)
4362bf3ee38SDavid Hildenbrand DEF_VME(32, 64)
4372bf3ee38SDavid Hildenbrand 
4382bf3ee38SDavid Hildenbrand #define DEF_VMLE(BITS, TBITS)                                                  \
4392bf3ee38SDavid Hildenbrand void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
4402bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
4412bf3ee38SDavid Hildenbrand {                                                                              \
4422bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
4432bf3ee38SDavid Hildenbrand                                                                                \
4442bf3ee38SDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
4452bf3ee38SDavid Hildenbrand         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
4462bf3ee38SDavid Hildenbrand         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
4472bf3ee38SDavid Hildenbrand                                                                                \
4482bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
4492bf3ee38SDavid Hildenbrand     }                                                                          \
4502bf3ee38SDavid Hildenbrand }
4512bf3ee38SDavid Hildenbrand DEF_VMLE(8, 16)
4522bf3ee38SDavid Hildenbrand DEF_VMLE(16, 32)
4532bf3ee38SDavid Hildenbrand DEF_VMLE(32, 64)
4542bf3ee38SDavid Hildenbrand 
4552bf3ee38SDavid Hildenbrand #define DEF_VMO(BITS, TBITS)                                                   \
4562bf3ee38SDavid Hildenbrand void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
4572bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
4582bf3ee38SDavid Hildenbrand {                                                                              \
4592bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
4602bf3ee38SDavid Hildenbrand                                                                                \
4612bf3ee38SDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
4622bf3ee38SDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
4632bf3ee38SDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
4642bf3ee38SDavid Hildenbrand                                                                                \
4652bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
4662bf3ee38SDavid Hildenbrand     }                                                                          \
4672bf3ee38SDavid Hildenbrand }
4682bf3ee38SDavid Hildenbrand DEF_VMO(8, 16)
4692bf3ee38SDavid Hildenbrand DEF_VMO(16, 32)
4702bf3ee38SDavid Hildenbrand DEF_VMO(32, 64)
4712bf3ee38SDavid Hildenbrand 
4722bf3ee38SDavid Hildenbrand #define DEF_VMLO(BITS, TBITS)                                                  \
4732bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
4742bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
4752bf3ee38SDavid Hildenbrand {                                                                              \
4762bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
4772bf3ee38SDavid Hildenbrand                                                                                \
4782bf3ee38SDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
4792bf3ee38SDavid Hildenbrand         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
4802bf3ee38SDavid Hildenbrand         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
4812bf3ee38SDavid Hildenbrand                                                                                \
4822bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
4832bf3ee38SDavid Hildenbrand     }                                                                          \
4842bf3ee38SDavid Hildenbrand }
4852bf3ee38SDavid Hildenbrand DEF_VMLO(8, 16)
4862bf3ee38SDavid Hildenbrand DEF_VMLO(16, 32)
4872bf3ee38SDavid Hildenbrand DEF_VMLO(32, 64)
488c3838aaaSDavid Hildenbrand 
489c3838aaaSDavid Hildenbrand #define DEF_VPOPCT(BITS)                                                       \
490c3838aaaSDavid Hildenbrand void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
491c3838aaaSDavid Hildenbrand {                                                                              \
492c3838aaaSDavid Hildenbrand     int i;                                                                     \
493c3838aaaSDavid Hildenbrand                                                                                \
494c3838aaaSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
495c3838aaaSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
496c3838aaaSDavid Hildenbrand                                                                                \
497c3838aaaSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
498c3838aaaSDavid Hildenbrand     }                                                                          \
499c3838aaaSDavid Hildenbrand }
500c3838aaaSDavid Hildenbrand DEF_VPOPCT(8)
501c3838aaaSDavid Hildenbrand DEF_VPOPCT(16)
50255236da2SDavid Hildenbrand 
50355236da2SDavid Hildenbrand #define DEF_VERLLV(BITS)                                                       \
50455236da2SDavid Hildenbrand void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3,       \
50555236da2SDavid Hildenbrand                                uint32_t desc)                                  \
50655236da2SDavid Hildenbrand {                                                                              \
50755236da2SDavid Hildenbrand     int i;                                                                     \
50855236da2SDavid Hildenbrand                                                                                \
50955236da2SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
51055236da2SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
51155236da2SDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
51255236da2SDavid Hildenbrand                                                                                \
51355236da2SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, rol##BITS(a, b));                  \
51455236da2SDavid Hildenbrand     }                                                                          \
51555236da2SDavid Hildenbrand }
51655236da2SDavid Hildenbrand DEF_VERLLV(8)
51755236da2SDavid Hildenbrand DEF_VERLLV(16)
51855236da2SDavid Hildenbrand 
51955236da2SDavid Hildenbrand #define DEF_VERLL(BITS)                                                        \
52055236da2SDavid Hildenbrand void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count,        \
52155236da2SDavid Hildenbrand                               uint32_t desc)                                   \
52255236da2SDavid Hildenbrand {                                                                              \
52355236da2SDavid Hildenbrand     int i;                                                                     \
52455236da2SDavid Hildenbrand                                                                                \
52555236da2SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
52655236da2SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
52755236da2SDavid Hildenbrand                                                                                \
52855236da2SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, rol##BITS(a, count));              \
52955236da2SDavid Hildenbrand     }                                                                          \
53055236da2SDavid Hildenbrand }
53155236da2SDavid Hildenbrand DEF_VERLL(8)
53255236da2SDavid Hildenbrand DEF_VERLL(16)
5335c4b0ab4SDavid Hildenbrand 
5345c4b0ab4SDavid Hildenbrand #define DEF_VERIM(BITS)                                                        \
5355c4b0ab4SDavid Hildenbrand void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
5365c4b0ab4SDavid Hildenbrand                               uint32_t desc)                                   \
5375c4b0ab4SDavid Hildenbrand {                                                                              \
5385c4b0ab4SDavid Hildenbrand     const uint8_t count = simd_data(desc);                                     \
5395c4b0ab4SDavid Hildenbrand     int i;                                                                     \
5405c4b0ab4SDavid Hildenbrand                                                                                \
5415c4b0ab4SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
5425c4b0ab4SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
5435c4b0ab4SDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
5445c4b0ab4SDavid Hildenbrand         const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
5455c4b0ab4SDavid Hildenbrand         const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
5465c4b0ab4SDavid Hildenbrand                                                                                \
5475c4b0ab4SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, d);                                \
5485c4b0ab4SDavid Hildenbrand     }                                                                          \
5495c4b0ab4SDavid Hildenbrand }
5505c4b0ab4SDavid Hildenbrand DEF_VERIM(8)
5515c4b0ab4SDavid Hildenbrand DEF_VERIM(16)
552dea33fc3SDavid Hildenbrand 
553dea33fc3SDavid Hildenbrand void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
554dea33fc3SDavid Hildenbrand                       uint32_t desc)
555dea33fc3SDavid Hildenbrand {
556dea33fc3SDavid Hildenbrand     s390_vec_shl(v1, v2, count);
557dea33fc3SDavid Hildenbrand }
5585f724887SDavid Hildenbrand 
5595f724887SDavid Hildenbrand void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
5605f724887SDavid Hildenbrand                        uint32_t desc)
5615f724887SDavid Hildenbrand {
5625f724887SDavid Hildenbrand     s390_vec_sar(v1, v2, count);
5635f724887SDavid Hildenbrand }
5648112274fSDavid Hildenbrand 
5658112274fSDavid Hildenbrand void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
5668112274fSDavid Hildenbrand                        uint32_t desc)
5678112274fSDavid Hildenbrand {
5688112274fSDavid Hildenbrand     s390_vec_shr(v1, v2, count);
5698112274fSDavid Hildenbrand }
570*1ee2d7baSDavid Hildenbrand 
571*1ee2d7baSDavid Hildenbrand #define DEF_VSCBI(BITS)                                                        \
572*1ee2d7baSDavid Hildenbrand void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
573*1ee2d7baSDavid Hildenbrand                               uint32_t desc)                                   \
574*1ee2d7baSDavid Hildenbrand {                                                                              \
575*1ee2d7baSDavid Hildenbrand     int i;                                                                     \
576*1ee2d7baSDavid Hildenbrand                                                                                \
577*1ee2d7baSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
578*1ee2d7baSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
579*1ee2d7baSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
580*1ee2d7baSDavid Hildenbrand                                                                                \
581*1ee2d7baSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a < b);                            \
582*1ee2d7baSDavid Hildenbrand     }                                                                          \
583*1ee2d7baSDavid Hildenbrand }
584*1ee2d7baSDavid Hildenbrand DEF_VSCBI(8)
585*1ee2d7baSDavid Hildenbrand DEF_VSCBI(16)
586