xref: /qemu/target/s390x/tcg/vec_int_helper.c (revision 2bf3ee38f1f8462d5267ce584182cfe448398e10)
1c1a81d4bSDavid Hildenbrand /*
2c1a81d4bSDavid Hildenbrand  * QEMU TCG support -- s390x vector integer instruction support
3c1a81d4bSDavid Hildenbrand  *
4c1a81d4bSDavid Hildenbrand  * Copyright (C) 2019 Red Hat Inc
5c1a81d4bSDavid Hildenbrand  *
6c1a81d4bSDavid Hildenbrand  * Authors:
7c1a81d4bSDavid Hildenbrand  *   David Hildenbrand <david@redhat.com>
8c1a81d4bSDavid Hildenbrand  *
9c1a81d4bSDavid Hildenbrand  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10c1a81d4bSDavid Hildenbrand  * See the COPYING file in the top-level directory.
11c1a81d4bSDavid Hildenbrand  */
12c1a81d4bSDavid Hildenbrand #include "qemu/osdep.h"
13c1a81d4bSDavid Hildenbrand #include "qemu-common.h"
14c1a81d4bSDavid Hildenbrand #include "cpu.h"
15c1a81d4bSDavid Hildenbrand #include "vec.h"
16c1a81d4bSDavid Hildenbrand #include "exec/helper-proto.h"
17c1a81d4bSDavid Hildenbrand 
18697a45d6SDavid Hildenbrand static bool s390_vec_is_zero(const S390Vector *v)
19697a45d6SDavid Hildenbrand {
20697a45d6SDavid Hildenbrand     return !v->doubleword[0] && !v->doubleword[1];
21697a45d6SDavid Hildenbrand }
22697a45d6SDavid Hildenbrand 
23697a45d6SDavid Hildenbrand static void s390_vec_xor(S390Vector *res, const S390Vector *a,
24697a45d6SDavid Hildenbrand                          const S390Vector *b)
25697a45d6SDavid Hildenbrand {
26697a45d6SDavid Hildenbrand     res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
27697a45d6SDavid Hildenbrand     res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
28697a45d6SDavid Hildenbrand }
29697a45d6SDavid Hildenbrand 
30697a45d6SDavid Hildenbrand static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
31697a45d6SDavid Hildenbrand {
32697a45d6SDavid Hildenbrand     uint64_t tmp;
33697a45d6SDavid Hildenbrand 
34697a45d6SDavid Hildenbrand     g_assert(count < 128);
35697a45d6SDavid Hildenbrand     if (count == 0) {
36697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
37697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
38697a45d6SDavid Hildenbrand     } else if (count == 64) {
39697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[1];
40697a45d6SDavid Hildenbrand         d->doubleword[1] = 0;
41697a45d6SDavid Hildenbrand     } else if (count < 64) {
42697a45d6SDavid Hildenbrand         tmp = extract64(a->doubleword[1], 64 - count, count);
43697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1] << count;
44697a45d6SDavid Hildenbrand         d->doubleword[0] = (a->doubleword[0] << count) | tmp;
45697a45d6SDavid Hildenbrand     } else {
46697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[1] << (count - 64);
47697a45d6SDavid Hildenbrand         d->doubleword[1] = 0;
48697a45d6SDavid Hildenbrand     }
49697a45d6SDavid Hildenbrand }
50697a45d6SDavid Hildenbrand 
51697a45d6SDavid Hildenbrand static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
52697a45d6SDavid Hildenbrand {
53697a45d6SDavid Hildenbrand     uint64_t tmp;
54697a45d6SDavid Hildenbrand 
55697a45d6SDavid Hildenbrand     g_assert(count < 128);
56697a45d6SDavid Hildenbrand     if (count == 0) {
57697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0];
58697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[1];
59697a45d6SDavid Hildenbrand     } else if (count == 64) {
60697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0];
61697a45d6SDavid Hildenbrand         d->doubleword[0] = 0;
62697a45d6SDavid Hildenbrand     } else if (count < 64) {
63697a45d6SDavid Hildenbrand         tmp = a->doubleword[1] >> count;
64697a45d6SDavid Hildenbrand         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
65697a45d6SDavid Hildenbrand         d->doubleword[0] = a->doubleword[0] >> count;
66697a45d6SDavid Hildenbrand     } else {
67697a45d6SDavid Hildenbrand         d->doubleword[1] = a->doubleword[0] >> (count - 64);
68697a45d6SDavid Hildenbrand         d->doubleword[0] = 0;
69697a45d6SDavid Hildenbrand     }
70697a45d6SDavid Hildenbrand }
71c1a81d4bSDavid Hildenbrand #define DEF_VAVG(BITS)                                                         \
72c1a81d4bSDavid Hildenbrand void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
73c1a81d4bSDavid Hildenbrand                              uint32_t desc)                                    \
74c1a81d4bSDavid Hildenbrand {                                                                              \
75c1a81d4bSDavid Hildenbrand     int i;                                                                     \
76c1a81d4bSDavid Hildenbrand                                                                                \
77c1a81d4bSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
78c1a81d4bSDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
79c1a81d4bSDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
80c1a81d4bSDavid Hildenbrand                                                                                \
81c1a81d4bSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
82c1a81d4bSDavid Hildenbrand     }                                                                          \
83c1a81d4bSDavid Hildenbrand }
84c1a81d4bSDavid Hildenbrand DEF_VAVG(8)
85c1a81d4bSDavid Hildenbrand DEF_VAVG(16)
86801aa78bSDavid Hildenbrand 
87801aa78bSDavid Hildenbrand #define DEF_VAVGL(BITS)                                                        \
88801aa78bSDavid Hildenbrand void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
89801aa78bSDavid Hildenbrand                               uint32_t desc)                                   \
90801aa78bSDavid Hildenbrand {                                                                              \
91801aa78bSDavid Hildenbrand     int i;                                                                     \
92801aa78bSDavid Hildenbrand                                                                                \
93801aa78bSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
94801aa78bSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
95801aa78bSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
96801aa78bSDavid Hildenbrand                                                                                \
97801aa78bSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
98801aa78bSDavid Hildenbrand     }                                                                          \
99801aa78bSDavid Hildenbrand }
100801aa78bSDavid Hildenbrand DEF_VAVGL(8)
101801aa78bSDavid Hildenbrand DEF_VAVGL(16)
10228863f1dSDavid Hildenbrand 
10328863f1dSDavid Hildenbrand #define DEF_VCLZ(BITS)                                                         \
10428863f1dSDavid Hildenbrand void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
10528863f1dSDavid Hildenbrand {                                                                              \
10628863f1dSDavid Hildenbrand     int i;                                                                     \
10728863f1dSDavid Hildenbrand                                                                                \
10828863f1dSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
10928863f1dSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
11028863f1dSDavid Hildenbrand                                                                                \
11128863f1dSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
11228863f1dSDavid Hildenbrand     }                                                                          \
11328863f1dSDavid Hildenbrand }
11428863f1dSDavid Hildenbrand DEF_VCLZ(8)
11528863f1dSDavid Hildenbrand DEF_VCLZ(16)
116449a8ac2SDavid Hildenbrand 
117449a8ac2SDavid Hildenbrand #define DEF_VCTZ(BITS)                                                         \
118449a8ac2SDavid Hildenbrand void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
119449a8ac2SDavid Hildenbrand {                                                                              \
120449a8ac2SDavid Hildenbrand     int i;                                                                     \
121449a8ac2SDavid Hildenbrand                                                                                \
122449a8ac2SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
123449a8ac2SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
124449a8ac2SDavid Hildenbrand                                                                                \
125449a8ac2SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
126449a8ac2SDavid Hildenbrand     }                                                                          \
127449a8ac2SDavid Hildenbrand }
128449a8ac2SDavid Hildenbrand DEF_VCTZ(8)
129449a8ac2SDavid Hildenbrand DEF_VCTZ(16)
130697a45d6SDavid Hildenbrand 
131697a45d6SDavid Hildenbrand /* like binary multiplication, but XOR instead of addition */
132697a45d6SDavid Hildenbrand #define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
133697a45d6SDavid Hildenbrand static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
134697a45d6SDavid Hildenbrand                                              uint##TBITS##_t b)                \
135697a45d6SDavid Hildenbrand {                                                                              \
136697a45d6SDavid Hildenbrand     uint##TBITS##_t res = 0;                                                   \
137697a45d6SDavid Hildenbrand                                                                                \
138697a45d6SDavid Hildenbrand     while (b) {                                                                \
139697a45d6SDavid Hildenbrand         if (b & 0x1) {                                                         \
140697a45d6SDavid Hildenbrand             res = res ^ a;                                                     \
141697a45d6SDavid Hildenbrand         }                                                                      \
142697a45d6SDavid Hildenbrand         a = a << 1;                                                            \
143697a45d6SDavid Hildenbrand         b = b >> 1;                                                            \
144697a45d6SDavid Hildenbrand     }                                                                          \
145697a45d6SDavid Hildenbrand     return res;                                                                \
146697a45d6SDavid Hildenbrand }
147697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(8, 16)
148697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(16, 32)
149697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(32, 64)
150697a45d6SDavid Hildenbrand 
151697a45d6SDavid Hildenbrand static S390Vector galois_multiply64(uint64_t a, uint64_t b)
152697a45d6SDavid Hildenbrand {
153697a45d6SDavid Hildenbrand     S390Vector res = {};
154697a45d6SDavid Hildenbrand     S390Vector va = {
155697a45d6SDavid Hildenbrand         .doubleword[1] = a,
156697a45d6SDavid Hildenbrand     };
157697a45d6SDavid Hildenbrand     S390Vector vb = {
158697a45d6SDavid Hildenbrand         .doubleword[1] = b,
159697a45d6SDavid Hildenbrand     };
160697a45d6SDavid Hildenbrand 
161697a45d6SDavid Hildenbrand     while (!s390_vec_is_zero(&vb)) {
162697a45d6SDavid Hildenbrand         if (vb.doubleword[1] & 0x1) {
163697a45d6SDavid Hildenbrand             s390_vec_xor(&res, &res, &va);
164697a45d6SDavid Hildenbrand         }
165697a45d6SDavid Hildenbrand         s390_vec_shl(&va, &va, 1);
166697a45d6SDavid Hildenbrand         s390_vec_shr(&vb, &vb, 1);
167697a45d6SDavid Hildenbrand     }
168697a45d6SDavid Hildenbrand     return res;
169697a45d6SDavid Hildenbrand }
170697a45d6SDavid Hildenbrand 
171697a45d6SDavid Hildenbrand #define DEF_VGFM(BITS, TBITS)                                                  \
172697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
173697a45d6SDavid Hildenbrand                              uint32_t desc)                                    \
174697a45d6SDavid Hildenbrand {                                                                              \
175697a45d6SDavid Hildenbrand     int i;                                                                     \
176697a45d6SDavid Hildenbrand                                                                                \
177697a45d6SDavid Hildenbrand     for (i = 0; i < (128 / TBITS); i++) {                                      \
178697a45d6SDavid Hildenbrand         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
179697a45d6SDavid Hildenbrand         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
180697a45d6SDavid Hildenbrand         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
181697a45d6SDavid Hildenbrand                                                                                \
182697a45d6SDavid Hildenbrand         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
183697a45d6SDavid Hildenbrand         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
184697a45d6SDavid Hildenbrand         d = d ^ galois_multiply32(a, b);                                       \
185697a45d6SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, d);                               \
186697a45d6SDavid Hildenbrand     }                                                                          \
187697a45d6SDavid Hildenbrand }
188697a45d6SDavid Hildenbrand DEF_VGFM(8, 16)
189697a45d6SDavid Hildenbrand DEF_VGFM(16, 32)
190697a45d6SDavid Hildenbrand DEF_VGFM(32, 64)
191697a45d6SDavid Hildenbrand 
192697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
193697a45d6SDavid Hildenbrand                          uint32_t desc)
194697a45d6SDavid Hildenbrand {
195697a45d6SDavid Hildenbrand     S390Vector tmp1, tmp2;
196697a45d6SDavid Hildenbrand     uint64_t a, b;
197697a45d6SDavid Hildenbrand 
198697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 0);
199697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 0);
200697a45d6SDavid Hildenbrand     tmp1 = galois_multiply64(a, b);
201697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 1);
202697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 1);
203697a45d6SDavid Hildenbrand     tmp2 = galois_multiply64(a, b);
204697a45d6SDavid Hildenbrand     s390_vec_xor(v1, &tmp1, &tmp2);
205697a45d6SDavid Hildenbrand }
206697a45d6SDavid Hildenbrand 
207697a45d6SDavid Hildenbrand #define DEF_VGFMA(BITS, TBITS)                                                 \
208697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
209697a45d6SDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
210697a45d6SDavid Hildenbrand {                                                                              \
211697a45d6SDavid Hildenbrand     int i;                                                                     \
212697a45d6SDavid Hildenbrand                                                                                \
213697a45d6SDavid Hildenbrand     for (i = 0; i < (128 / TBITS); i++) {                                      \
214697a45d6SDavid Hildenbrand         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
215697a45d6SDavid Hildenbrand         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
216697a45d6SDavid Hildenbrand         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
217697a45d6SDavid Hildenbrand                                                                                \
218697a45d6SDavid Hildenbrand         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
219697a45d6SDavid Hildenbrand         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
220697a45d6SDavid Hildenbrand         d = d ^ galois_multiply32(a, b);                                       \
221697a45d6SDavid Hildenbrand         d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
222697a45d6SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, d);                               \
223697a45d6SDavid Hildenbrand     }                                                                          \
224697a45d6SDavid Hildenbrand }
225697a45d6SDavid Hildenbrand DEF_VGFMA(8, 16)
226697a45d6SDavid Hildenbrand DEF_VGFMA(16, 32)
227697a45d6SDavid Hildenbrand DEF_VGFMA(32, 64)
228697a45d6SDavid Hildenbrand 
229697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
230697a45d6SDavid Hildenbrand                           const void *v4, uint32_t desc)
231697a45d6SDavid Hildenbrand {
232697a45d6SDavid Hildenbrand     S390Vector tmp1, tmp2;
233697a45d6SDavid Hildenbrand     uint64_t a, b;
234697a45d6SDavid Hildenbrand 
235697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 0);
236697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 0);
237697a45d6SDavid Hildenbrand     tmp1 = galois_multiply64(a, b);
238697a45d6SDavid Hildenbrand     a = s390_vec_read_element64(v2, 1);
239697a45d6SDavid Hildenbrand     b = s390_vec_read_element64(v3, 1);
240697a45d6SDavid Hildenbrand     tmp2 = galois_multiply64(a, b);
241697a45d6SDavid Hildenbrand     s390_vec_xor(&tmp1, &tmp1, &tmp2);
242697a45d6SDavid Hildenbrand     s390_vec_xor(v1, &tmp1, v4);
243697a45d6SDavid Hildenbrand }
2441b430aecSDavid Hildenbrand 
2451b430aecSDavid Hildenbrand #define DEF_VMAL(BITS)                                                         \
2461b430aecSDavid Hildenbrand void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
2471b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
2481b430aecSDavid Hildenbrand {                                                                              \
2491b430aecSDavid Hildenbrand     int i;                                                                     \
2501b430aecSDavid Hildenbrand                                                                                \
2511b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
2521b430aecSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
2531b430aecSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
2541b430aecSDavid Hildenbrand         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
2551b430aecSDavid Hildenbrand                                                                                \
2561b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, a * b + c);                        \
2571b430aecSDavid Hildenbrand     }                                                                          \
2581b430aecSDavid Hildenbrand }
2591b430aecSDavid Hildenbrand DEF_VMAL(8)
2601b430aecSDavid Hildenbrand DEF_VMAL(16)
2611b430aecSDavid Hildenbrand 
2621b430aecSDavid Hildenbrand #define DEF_VMAH(BITS)                                                         \
2631b430aecSDavid Hildenbrand void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
2641b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
2651b430aecSDavid Hildenbrand {                                                                              \
2661b430aecSDavid Hildenbrand     int i;                                                                     \
2671b430aecSDavid Hildenbrand                                                                                \
2681b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
2691b430aecSDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
2701b430aecSDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
2711b430aecSDavid Hildenbrand         const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
2721b430aecSDavid Hildenbrand                                                                                \
2731b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
2741b430aecSDavid Hildenbrand     }                                                                          \
2751b430aecSDavid Hildenbrand }
2761b430aecSDavid Hildenbrand DEF_VMAH(8)
2771b430aecSDavid Hildenbrand DEF_VMAH(16)
2781b430aecSDavid Hildenbrand 
2791b430aecSDavid Hildenbrand #define DEF_VMALH(BITS)                                                        \
2801b430aecSDavid Hildenbrand void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
2811b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
2821b430aecSDavid Hildenbrand {                                                                              \
2831b430aecSDavid Hildenbrand     int i;                                                                     \
2841b430aecSDavid Hildenbrand                                                                                \
2851b430aecSDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
2861b430aecSDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
2871b430aecSDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
2881b430aecSDavid Hildenbrand         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
2891b430aecSDavid Hildenbrand                                                                                \
2901b430aecSDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
2911b430aecSDavid Hildenbrand     }                                                                          \
2921b430aecSDavid Hildenbrand }
2931b430aecSDavid Hildenbrand DEF_VMALH(8)
2941b430aecSDavid Hildenbrand DEF_VMALH(16)
2951b430aecSDavid Hildenbrand 
2961b430aecSDavid Hildenbrand #define DEF_VMAE(BITS, TBITS)                                                  \
2971b430aecSDavid Hildenbrand void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
2981b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
2991b430aecSDavid Hildenbrand {                                                                              \
3001b430aecSDavid Hildenbrand     int i, j;                                                                  \
3011b430aecSDavid Hildenbrand                                                                                \
3021b430aecSDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
3031b430aecSDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
3041b430aecSDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
3051b430aecSDavid Hildenbrand         int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j);  \
3061b430aecSDavid Hildenbrand                                                                                \
3071b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3081b430aecSDavid Hildenbrand     }                                                                          \
3091b430aecSDavid Hildenbrand }
3101b430aecSDavid Hildenbrand DEF_VMAE(8, 16)
3111b430aecSDavid Hildenbrand DEF_VMAE(16, 32)
3121b430aecSDavid Hildenbrand DEF_VMAE(32, 64)
3131b430aecSDavid Hildenbrand 
3141b430aecSDavid Hildenbrand #define DEF_VMALE(BITS, TBITS)                                                 \
3151b430aecSDavid Hildenbrand void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
3161b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
3171b430aecSDavid Hildenbrand {                                                                              \
3181b430aecSDavid Hildenbrand     int i, j;                                                                  \
3191b430aecSDavid Hildenbrand                                                                                \
3201b430aecSDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
3211b430aecSDavid Hildenbrand         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
3221b430aecSDavid Hildenbrand         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
3231b430aecSDavid Hildenbrand         uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j);                \
3241b430aecSDavid Hildenbrand                                                                                \
3251b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3261b430aecSDavid Hildenbrand     }                                                                          \
3271b430aecSDavid Hildenbrand }
3281b430aecSDavid Hildenbrand DEF_VMALE(8, 16)
3291b430aecSDavid Hildenbrand DEF_VMALE(16, 32)
3301b430aecSDavid Hildenbrand DEF_VMALE(32, 64)
3311b430aecSDavid Hildenbrand 
3321b430aecSDavid Hildenbrand #define DEF_VMAO(BITS, TBITS)                                                  \
3331b430aecSDavid Hildenbrand void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
3341b430aecSDavid Hildenbrand                              const void *v4, uint32_t desc)                    \
3351b430aecSDavid Hildenbrand {                                                                              \
3361b430aecSDavid Hildenbrand     int i, j;                                                                  \
3371b430aecSDavid Hildenbrand                                                                                \
3381b430aecSDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
3391b430aecSDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
3401b430aecSDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
3411b430aecSDavid Hildenbrand         int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j);  \
3421b430aecSDavid Hildenbrand                                                                                \
3431b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3441b430aecSDavid Hildenbrand     }                                                                          \
3451b430aecSDavid Hildenbrand }
3461b430aecSDavid Hildenbrand DEF_VMAO(8, 16)
3471b430aecSDavid Hildenbrand DEF_VMAO(16, 32)
3481b430aecSDavid Hildenbrand DEF_VMAO(32, 64)
3491b430aecSDavid Hildenbrand 
3501b430aecSDavid Hildenbrand #define DEF_VMALO(BITS, TBITS)                                                 \
3511b430aecSDavid Hildenbrand void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
3521b430aecSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
3531b430aecSDavid Hildenbrand {                                                                              \
3541b430aecSDavid Hildenbrand     int i, j;                                                                  \
3551b430aecSDavid Hildenbrand                                                                                \
3561b430aecSDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
3571b430aecSDavid Hildenbrand         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
3581b430aecSDavid Hildenbrand         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
3591b430aecSDavid Hildenbrand         uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j);                \
3601b430aecSDavid Hildenbrand                                                                                \
3611b430aecSDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
3621b430aecSDavid Hildenbrand     }                                                                          \
3631b430aecSDavid Hildenbrand }
3641b430aecSDavid Hildenbrand DEF_VMALO(8, 16)
3651b430aecSDavid Hildenbrand DEF_VMALO(16, 32)
3661b430aecSDavid Hildenbrand DEF_VMALO(32, 64)
367*2bf3ee38SDavid Hildenbrand 
368*2bf3ee38SDavid Hildenbrand #define DEF_VMH(BITS)                                                          \
369*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
370*2bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
371*2bf3ee38SDavid Hildenbrand {                                                                              \
372*2bf3ee38SDavid Hildenbrand     int i;                                                                     \
373*2bf3ee38SDavid Hildenbrand                                                                                \
374*2bf3ee38SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
375*2bf3ee38SDavid Hildenbrand         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
376*2bf3ee38SDavid Hildenbrand         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
377*2bf3ee38SDavid Hildenbrand                                                                                \
378*2bf3ee38SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
379*2bf3ee38SDavid Hildenbrand     }                                                                          \
380*2bf3ee38SDavid Hildenbrand }
381*2bf3ee38SDavid Hildenbrand DEF_VMH(8)
382*2bf3ee38SDavid Hildenbrand DEF_VMH(16)
383*2bf3ee38SDavid Hildenbrand 
384*2bf3ee38SDavid Hildenbrand #define DEF_VMLH(BITS)                                                         \
385*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
386*2bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
387*2bf3ee38SDavid Hildenbrand {                                                                              \
388*2bf3ee38SDavid Hildenbrand     int i;                                                                     \
389*2bf3ee38SDavid Hildenbrand                                                                                \
390*2bf3ee38SDavid Hildenbrand     for (i = 0; i < (128 / BITS); i++) {                                       \
391*2bf3ee38SDavid Hildenbrand         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
392*2bf3ee38SDavid Hildenbrand         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
393*2bf3ee38SDavid Hildenbrand                                                                                \
394*2bf3ee38SDavid Hildenbrand         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
395*2bf3ee38SDavid Hildenbrand     }                                                                          \
396*2bf3ee38SDavid Hildenbrand }
397*2bf3ee38SDavid Hildenbrand DEF_VMLH(8)
398*2bf3ee38SDavid Hildenbrand DEF_VMLH(16)
399*2bf3ee38SDavid Hildenbrand 
400*2bf3ee38SDavid Hildenbrand #define DEF_VME(BITS, TBITS)                                                   \
401*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
402*2bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
403*2bf3ee38SDavid Hildenbrand {                                                                              \
404*2bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
405*2bf3ee38SDavid Hildenbrand                                                                                \
406*2bf3ee38SDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
407*2bf3ee38SDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
408*2bf3ee38SDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
409*2bf3ee38SDavid Hildenbrand                                                                                \
410*2bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
411*2bf3ee38SDavid Hildenbrand     }                                                                          \
412*2bf3ee38SDavid Hildenbrand }
413*2bf3ee38SDavid Hildenbrand DEF_VME(8, 16)
414*2bf3ee38SDavid Hildenbrand DEF_VME(16, 32)
415*2bf3ee38SDavid Hildenbrand DEF_VME(32, 64)
416*2bf3ee38SDavid Hildenbrand 
417*2bf3ee38SDavid Hildenbrand #define DEF_VMLE(BITS, TBITS)                                                  \
418*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
419*2bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
420*2bf3ee38SDavid Hildenbrand {                                                                              \
421*2bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
422*2bf3ee38SDavid Hildenbrand                                                                                \
423*2bf3ee38SDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
424*2bf3ee38SDavid Hildenbrand         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
425*2bf3ee38SDavid Hildenbrand         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
426*2bf3ee38SDavid Hildenbrand                                                                                \
427*2bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
428*2bf3ee38SDavid Hildenbrand     }                                                                          \
429*2bf3ee38SDavid Hildenbrand }
430*2bf3ee38SDavid Hildenbrand DEF_VMLE(8, 16)
431*2bf3ee38SDavid Hildenbrand DEF_VMLE(16, 32)
432*2bf3ee38SDavid Hildenbrand DEF_VMLE(32, 64)
433*2bf3ee38SDavid Hildenbrand 
434*2bf3ee38SDavid Hildenbrand #define DEF_VMO(BITS, TBITS)                                                   \
435*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
436*2bf3ee38SDavid Hildenbrand                             uint32_t desc)                                     \
437*2bf3ee38SDavid Hildenbrand {                                                                              \
438*2bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
439*2bf3ee38SDavid Hildenbrand                                                                                \
440*2bf3ee38SDavid Hildenbrand     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
441*2bf3ee38SDavid Hildenbrand         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
442*2bf3ee38SDavid Hildenbrand         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
443*2bf3ee38SDavid Hildenbrand                                                                                \
444*2bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
445*2bf3ee38SDavid Hildenbrand     }                                                                          \
446*2bf3ee38SDavid Hildenbrand }
447*2bf3ee38SDavid Hildenbrand DEF_VMO(8, 16)
448*2bf3ee38SDavid Hildenbrand DEF_VMO(16, 32)
449*2bf3ee38SDavid Hildenbrand DEF_VMO(32, 64)
450*2bf3ee38SDavid Hildenbrand 
451*2bf3ee38SDavid Hildenbrand #define DEF_VMLO(BITS, TBITS)                                                  \
452*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
453*2bf3ee38SDavid Hildenbrand                              uint32_t desc)                                    \
454*2bf3ee38SDavid Hildenbrand {                                                                              \
455*2bf3ee38SDavid Hildenbrand     int i, j;                                                                  \
456*2bf3ee38SDavid Hildenbrand                                                                                \
457*2bf3ee38SDavid Hildenbrand     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
458*2bf3ee38SDavid Hildenbrand         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
459*2bf3ee38SDavid Hildenbrand         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
460*2bf3ee38SDavid Hildenbrand                                                                                \
461*2bf3ee38SDavid Hildenbrand         s390_vec_write_element##TBITS(v1, i, a * b);                           \
462*2bf3ee38SDavid Hildenbrand     }                                                                          \
463*2bf3ee38SDavid Hildenbrand }
464*2bf3ee38SDavid Hildenbrand DEF_VMLO(8, 16)
465*2bf3ee38SDavid Hildenbrand DEF_VMLO(16, 32)
466*2bf3ee38SDavid Hildenbrand DEF_VMLO(32, 64)
467