1c1a81d4bSDavid Hildenbrand /*
2c1a81d4bSDavid Hildenbrand * QEMU TCG support -- s390x vector integer instruction support
3c1a81d4bSDavid Hildenbrand *
4c1a81d4bSDavid Hildenbrand * Copyright (C) 2019 Red Hat Inc
5c1a81d4bSDavid Hildenbrand *
6c1a81d4bSDavid Hildenbrand * Authors:
7c1a81d4bSDavid Hildenbrand * David Hildenbrand <david@redhat.com>
8c1a81d4bSDavid Hildenbrand *
9c1a81d4bSDavid Hildenbrand * This work is licensed under the terms of the GNU GPL, version 2 or later.
10c1a81d4bSDavid Hildenbrand * See the COPYING file in the top-level directory.
11c1a81d4bSDavid Hildenbrand */
12c1a81d4bSDavid Hildenbrand #include "qemu/osdep.h"
13c1a81d4bSDavid Hildenbrand #include "cpu.h"
14c1a81d4bSDavid Hildenbrand #include "vec.h"
15c1a81d4bSDavid Hildenbrand #include "exec/helper-proto.h"
165c4b0ab4SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h"
172d8bc681SRichard Henderson #include "crypto/clmul.h"
18c1a81d4bSDavid Hildenbrand
s390_vec_is_zero(const S390Vector * v)19697a45d6SDavid Hildenbrand static bool s390_vec_is_zero(const S390Vector *v)
20697a45d6SDavid Hildenbrand {
21697a45d6SDavid Hildenbrand return !v->doubleword[0] && !v->doubleword[1];
22697a45d6SDavid Hildenbrand }
23697a45d6SDavid Hildenbrand
s390_vec_and(S390Vector * res,const S390Vector * a,const S390Vector * b)24db156ebfSDavid Hildenbrand static void s390_vec_and(S390Vector *res, const S390Vector *a,
25db156ebfSDavid Hildenbrand const S390Vector *b)
26db156ebfSDavid Hildenbrand {
27db156ebfSDavid Hildenbrand res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
28db156ebfSDavid Hildenbrand res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
29db156ebfSDavid Hildenbrand }
30db156ebfSDavid Hildenbrand
s390_vec_equal(const S390Vector * a,const S390Vector * b)31db156ebfSDavid Hildenbrand static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
32db156ebfSDavid Hildenbrand {
33db156ebfSDavid Hildenbrand return a->doubleword[0] == b->doubleword[0] &&
34db156ebfSDavid Hildenbrand a->doubleword[1] == b->doubleword[1];
35db156ebfSDavid Hildenbrand }
36db156ebfSDavid Hildenbrand
s390_vec_shl(S390Vector * d,const S390Vector * a,uint64_t count)37697a45d6SDavid Hildenbrand static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
38697a45d6SDavid Hildenbrand {
39697a45d6SDavid Hildenbrand uint64_t tmp;
40697a45d6SDavid Hildenbrand
41697a45d6SDavid Hildenbrand g_assert(count < 128);
42697a45d6SDavid Hildenbrand if (count == 0) {
43697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0];
44697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1];
45697a45d6SDavid Hildenbrand } else if (count == 64) {
46697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1];
47697a45d6SDavid Hildenbrand d->doubleword[1] = 0;
48697a45d6SDavid Hildenbrand } else if (count < 64) {
49697a45d6SDavid Hildenbrand tmp = extract64(a->doubleword[1], 64 - count, count);
50697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1] << count;
51697a45d6SDavid Hildenbrand d->doubleword[0] = (a->doubleword[0] << count) | tmp;
52697a45d6SDavid Hildenbrand } else {
53697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1] << (count - 64);
54697a45d6SDavid Hildenbrand d->doubleword[1] = 0;
55697a45d6SDavid Hildenbrand }
56697a45d6SDavid Hildenbrand }
57697a45d6SDavid Hildenbrand
s390_vec_sar(S390Vector * d,const S390Vector * a,uint64_t count)585f724887SDavid Hildenbrand static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
595f724887SDavid Hildenbrand {
605f724887SDavid Hildenbrand uint64_t tmp;
615f724887SDavid Hildenbrand
625f724887SDavid Hildenbrand if (count == 0) {
635f724887SDavid Hildenbrand d->doubleword[0] = a->doubleword[0];
645f724887SDavid Hildenbrand d->doubleword[1] = a->doubleword[1];
655f724887SDavid Hildenbrand } else if (count == 64) {
66b57b3368SDavid Hildenbrand tmp = (int64_t)a->doubleword[0] >> 63;
675f724887SDavid Hildenbrand d->doubleword[1] = a->doubleword[0];
68b57b3368SDavid Hildenbrand d->doubleword[0] = tmp;
695f724887SDavid Hildenbrand } else if (count < 64) {
705f724887SDavid Hildenbrand tmp = a->doubleword[1] >> count;
715f724887SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
725f724887SDavid Hildenbrand d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
735f724887SDavid Hildenbrand } else {
74b57b3368SDavid Hildenbrand tmp = (int64_t)a->doubleword[0] >> 63;
755f724887SDavid Hildenbrand d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
76b57b3368SDavid Hildenbrand d->doubleword[0] = tmp;
775f724887SDavid Hildenbrand }
785f724887SDavid Hildenbrand }
795f724887SDavid Hildenbrand
s390_vec_shr(S390Vector * d,const S390Vector * a,uint64_t count)80697a45d6SDavid Hildenbrand static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
81697a45d6SDavid Hildenbrand {
82697a45d6SDavid Hildenbrand uint64_t tmp;
83697a45d6SDavid Hildenbrand
84697a45d6SDavid Hildenbrand g_assert(count < 128);
85697a45d6SDavid Hildenbrand if (count == 0) {
86697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0];
87697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1];
88697a45d6SDavid Hildenbrand } else if (count == 64) {
89697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0];
90697a45d6SDavid Hildenbrand d->doubleword[0] = 0;
91697a45d6SDavid Hildenbrand } else if (count < 64) {
92697a45d6SDavid Hildenbrand tmp = a->doubleword[1] >> count;
93697a45d6SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
94697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0] >> count;
95697a45d6SDavid Hildenbrand } else {
96697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0] >> (count - 64);
97697a45d6SDavid Hildenbrand d->doubleword[0] = 0;
98697a45d6SDavid Hildenbrand }
99697a45d6SDavid Hildenbrand }
100c1a81d4bSDavid Hildenbrand #define DEF_VAVG(BITS) \
101c1a81d4bSDavid Hildenbrand void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \
102c1a81d4bSDavid Hildenbrand uint32_t desc) \
103c1a81d4bSDavid Hildenbrand { \
104c1a81d4bSDavid Hildenbrand int i; \
105c1a81d4bSDavid Hildenbrand \
106c1a81d4bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
107c1a81d4bSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
108c1a81d4bSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
109c1a81d4bSDavid Hildenbrand \
110c1a81d4bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
111c1a81d4bSDavid Hildenbrand } \
112c1a81d4bSDavid Hildenbrand }
113c1a81d4bSDavid Hildenbrand DEF_VAVG(8)
114c1a81d4bSDavid Hildenbrand DEF_VAVG(16)
115801aa78bSDavid Hildenbrand
116801aa78bSDavid Hildenbrand #define DEF_VAVGL(BITS) \
117801aa78bSDavid Hildenbrand void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \
118801aa78bSDavid Hildenbrand uint32_t desc) \
119801aa78bSDavid Hildenbrand { \
120801aa78bSDavid Hildenbrand int i; \
121801aa78bSDavid Hildenbrand \
122801aa78bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
123801aa78bSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
124801aa78bSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
125801aa78bSDavid Hildenbrand \
126801aa78bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
127801aa78bSDavid Hildenbrand } \
128801aa78bSDavid Hildenbrand }
129801aa78bSDavid Hildenbrand DEF_VAVGL(8)
130801aa78bSDavid Hildenbrand DEF_VAVGL(16)
13128863f1dSDavid Hildenbrand
13228863f1dSDavid Hildenbrand #define DEF_VCLZ(BITS) \
13328863f1dSDavid Hildenbrand void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \
13428863f1dSDavid Hildenbrand { \
13528863f1dSDavid Hildenbrand int i; \
13628863f1dSDavid Hildenbrand \
13728863f1dSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
13828863f1dSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
13928863f1dSDavid Hildenbrand \
14028863f1dSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \
14128863f1dSDavid Hildenbrand } \
14228863f1dSDavid Hildenbrand }
14328863f1dSDavid Hildenbrand DEF_VCLZ(8)
14428863f1dSDavid Hildenbrand DEF_VCLZ(16)
145449a8ac2SDavid Hildenbrand
146449a8ac2SDavid Hildenbrand #define DEF_VCTZ(BITS) \
147449a8ac2SDavid Hildenbrand void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \
148449a8ac2SDavid Hildenbrand { \
149449a8ac2SDavid Hildenbrand int i; \
150449a8ac2SDavid Hildenbrand \
151449a8ac2SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
152449a8ac2SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
153449a8ac2SDavid Hildenbrand \
154449a8ac2SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \
155449a8ac2SDavid Hildenbrand } \
156449a8ac2SDavid Hildenbrand }
157449a8ac2SDavid Hildenbrand DEF_VCTZ(8)
158449a8ac2SDavid Hildenbrand DEF_VCTZ(16)
159697a45d6SDavid Hildenbrand
160697a45d6SDavid Hildenbrand /* like binary multiplication, but XOR instead of addition */
161697a45d6SDavid Hildenbrand
1622d8bc681SRichard Henderson /*
1632d8bc681SRichard Henderson * There is no carry across the two doublewords, so their order does
1642d8bc681SRichard Henderson * not matter. Nor is there partial overlap between registers.
1652d8bc681SRichard Henderson */
do_gfma8(uint64_t n,uint64_t m,uint64_t a)1662d8bc681SRichard Henderson static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
1672d8bc681SRichard Henderson {
1682d8bc681SRichard Henderson return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
1692d8bc681SRichard Henderson }
1702d8bc681SRichard Henderson
HELPER(gvec_vgfm8)1712d8bc681SRichard Henderson void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
1722d8bc681SRichard Henderson {
1732d8bc681SRichard Henderson uint64_t *q1 = v1;
1742d8bc681SRichard Henderson const uint64_t *q2 = v2, *q3 = v3;
1752d8bc681SRichard Henderson
1762d8bc681SRichard Henderson q1[0] = do_gfma8(q2[0], q3[0], 0);
1772d8bc681SRichard Henderson q1[1] = do_gfma8(q2[1], q3[1], 0);
1782d8bc681SRichard Henderson }
1792d8bc681SRichard Henderson
HELPER(gvec_vgfma8)1802d8bc681SRichard Henderson void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
1812d8bc681SRichard Henderson const void *v4, uint32_t desc)
1822d8bc681SRichard Henderson {
1832d8bc681SRichard Henderson uint64_t *q1 = v1;
1842d8bc681SRichard Henderson const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
1852d8bc681SRichard Henderson
1862d8bc681SRichard Henderson q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
1872d8bc681SRichard Henderson q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
1882d8bc681SRichard Henderson }
1892d8bc681SRichard Henderson
do_gfma16(uint64_t n,uint64_t m,uint64_t a)19025c304e9SRichard Henderson static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a)
19125c304e9SRichard Henderson {
19225c304e9SRichard Henderson return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a;
19325c304e9SRichard Henderson }
19425c304e9SRichard Henderson
HELPER(gvec_vgfm16)19525c304e9SRichard Henderson void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d)
19625c304e9SRichard Henderson {
19725c304e9SRichard Henderson uint64_t *q1 = v1;
19825c304e9SRichard Henderson const uint64_t *q2 = v2, *q3 = v3;
19925c304e9SRichard Henderson
20025c304e9SRichard Henderson q1[0] = do_gfma16(q2[0], q3[0], 0);
20125c304e9SRichard Henderson q1[1] = do_gfma16(q2[1], q3[1], 0);
20225c304e9SRichard Henderson }
20325c304e9SRichard Henderson
HELPER(gvec_vgfma16)20425c304e9SRichard Henderson void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3,
20525c304e9SRichard Henderson const void *v4, uint32_t d)
20625c304e9SRichard Henderson {
20725c304e9SRichard Henderson uint64_t *q1 = v1;
20825c304e9SRichard Henderson const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
20925c304e9SRichard Henderson
21025c304e9SRichard Henderson q1[0] = do_gfma16(q2[0], q3[0], q4[0]);
21125c304e9SRichard Henderson q1[1] = do_gfma16(q2[1], q3[1], q4[1]);
21225c304e9SRichard Henderson }
21325c304e9SRichard Henderson
do_gfma32(uint64_t n,uint64_t m,uint64_t a)214653aab27SRichard Henderson static inline uint64_t do_gfma32(uint64_t n, uint64_t m, uint64_t a)
215653aab27SRichard Henderson {
216653aab27SRichard Henderson return clmul_32(n, m) ^ clmul_32(n >> 32, m >> 32) ^ a;
217697a45d6SDavid Hildenbrand }
218653aab27SRichard Henderson
HELPER(gvec_vgfm32)219653aab27SRichard Henderson void HELPER(gvec_vgfm32)(void *v1, const void *v2, const void *v3, uint32_t d)
220653aab27SRichard Henderson {
221653aab27SRichard Henderson uint64_t *q1 = v1;
222653aab27SRichard Henderson const uint64_t *q2 = v2, *q3 = v3;
223653aab27SRichard Henderson
224653aab27SRichard Henderson q1[0] = do_gfma32(q2[0], q3[0], 0);
225653aab27SRichard Henderson q1[1] = do_gfma32(q2[1], q3[1], 0);
226653aab27SRichard Henderson }
227653aab27SRichard Henderson
HELPER(gvec_vgfma32)228653aab27SRichard Henderson void HELPER(gvec_vgfma32)(void *v1, const void *v2, const void *v3,
229653aab27SRichard Henderson const void *v4, uint32_t d)
230653aab27SRichard Henderson {
231653aab27SRichard Henderson uint64_t *q1 = v1;
232653aab27SRichard Henderson const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
233653aab27SRichard Henderson
234653aab27SRichard Henderson q1[0] = do_gfma32(q2[0], q3[0], q4[0]);
235653aab27SRichard Henderson q1[1] = do_gfma32(q2[1], q3[1], q4[1]);
236653aab27SRichard Henderson }
237697a45d6SDavid Hildenbrand
HELPER(gvec_vgfm64)238697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
239697a45d6SDavid Hildenbrand uint32_t desc)
240697a45d6SDavid Hildenbrand {
241*ef73fe7cSRichard Henderson uint64_t *q1 = v1;
242*ef73fe7cSRichard Henderson const uint64_t *q2 = v2, *q3 = v3;
243*ef73fe7cSRichard Henderson Int128 r;
244697a45d6SDavid Hildenbrand
245*ef73fe7cSRichard Henderson r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
246*ef73fe7cSRichard Henderson q1[0] = int128_gethi(r);
247*ef73fe7cSRichard Henderson q1[1] = int128_getlo(r);
248697a45d6SDavid Hildenbrand }
249697a45d6SDavid Hildenbrand
HELPER(gvec_vgfma64)250697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
251697a45d6SDavid Hildenbrand const void *v4, uint32_t desc)
252697a45d6SDavid Hildenbrand {
253*ef73fe7cSRichard Henderson uint64_t *q1 = v1;
254*ef73fe7cSRichard Henderson const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
255*ef73fe7cSRichard Henderson Int128 r;
256697a45d6SDavid Hildenbrand
257*ef73fe7cSRichard Henderson r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
258*ef73fe7cSRichard Henderson q1[0] = q4[0] ^ int128_gethi(r);
259*ef73fe7cSRichard Henderson q1[1] = q4[1] ^ int128_getlo(r);
260697a45d6SDavid Hildenbrand }
2611b430aecSDavid Hildenbrand
2621b430aecSDavid Hildenbrand #define DEF_VMAL(BITS) \
2631b430aecSDavid Hildenbrand void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \
2641b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \
2651b430aecSDavid Hildenbrand { \
2661b430aecSDavid Hildenbrand int i; \
2671b430aecSDavid Hildenbrand \
2681b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
2691b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
2701b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
2711b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
2721b430aecSDavid Hildenbrand \
2731b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a * b + c); \
2741b430aecSDavid Hildenbrand } \
2751b430aecSDavid Hildenbrand }
2761b430aecSDavid Hildenbrand DEF_VMAL(8)
2771b430aecSDavid Hildenbrand DEF_VMAL(16)
2781b430aecSDavid Hildenbrand
2791b430aecSDavid Hildenbrand #define DEF_VMAH(BITS) \
2801b430aecSDavid Hildenbrand void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \
2811b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \
2821b430aecSDavid Hildenbrand { \
2831b430aecSDavid Hildenbrand int i; \
2841b430aecSDavid Hildenbrand \
2851b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
2861b430aecSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
2871b430aecSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
2881b430aecSDavid Hildenbrand const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \
2891b430aecSDavid Hildenbrand \
2901b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
2911b430aecSDavid Hildenbrand } \
2921b430aecSDavid Hildenbrand }
2931b430aecSDavid Hildenbrand DEF_VMAH(8)
2941b430aecSDavid Hildenbrand DEF_VMAH(16)
2951b430aecSDavid Hildenbrand
2961b430aecSDavid Hildenbrand #define DEF_VMALH(BITS) \
2971b430aecSDavid Hildenbrand void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \
2981b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \
2991b430aecSDavid Hildenbrand { \
3001b430aecSDavid Hildenbrand int i; \
3011b430aecSDavid Hildenbrand \
3021b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
3031b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
3041b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
3051b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
3061b430aecSDavid Hildenbrand \
3071b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
3081b430aecSDavid Hildenbrand } \
3091b430aecSDavid Hildenbrand }
3101b430aecSDavid Hildenbrand DEF_VMALH(8)
3111b430aecSDavid Hildenbrand DEF_VMALH(16)
3121b430aecSDavid Hildenbrand
3131b430aecSDavid Hildenbrand #define DEF_VMAE(BITS, TBITS) \
3141b430aecSDavid Hildenbrand void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \
3151b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \
3161b430aecSDavid Hildenbrand { \
3171b430aecSDavid Hildenbrand int i, j; \
3181b430aecSDavid Hildenbrand \
3191b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
3201b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
3211b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
3228b952519SDavid Hildenbrand int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
3231b430aecSDavid Hildenbrand \
3241b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \
3251b430aecSDavid Hildenbrand } \
3261b430aecSDavid Hildenbrand }
3271b430aecSDavid Hildenbrand DEF_VMAE(8, 16)
3281b430aecSDavid Hildenbrand DEF_VMAE(16, 32)
3291b430aecSDavid Hildenbrand DEF_VMAE(32, 64)
3301b430aecSDavid Hildenbrand
3311b430aecSDavid Hildenbrand #define DEF_VMALE(BITS, TBITS) \
3321b430aecSDavid Hildenbrand void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \
3331b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \
3341b430aecSDavid Hildenbrand { \
3351b430aecSDavid Hildenbrand int i, j; \
3361b430aecSDavid Hildenbrand \
3371b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
3381b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
3391b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
3408b952519SDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
3411b430aecSDavid Hildenbrand \
3421b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \
3431b430aecSDavid Hildenbrand } \
3441b430aecSDavid Hildenbrand }
3451b430aecSDavid Hildenbrand DEF_VMALE(8, 16)
3461b430aecSDavid Hildenbrand DEF_VMALE(16, 32)
3471b430aecSDavid Hildenbrand DEF_VMALE(32, 64)
3481b430aecSDavid Hildenbrand
3491b430aecSDavid Hildenbrand #define DEF_VMAO(BITS, TBITS) \
3501b430aecSDavid Hildenbrand void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \
3511b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \
3521b430aecSDavid Hildenbrand { \
3531b430aecSDavid Hildenbrand int i, j; \
3541b430aecSDavid Hildenbrand \
3551b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
3561b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
3571b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
3588b952519SDavid Hildenbrand int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
3591b430aecSDavid Hildenbrand \
3601b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \
3611b430aecSDavid Hildenbrand } \
3621b430aecSDavid Hildenbrand }
3631b430aecSDavid Hildenbrand DEF_VMAO(8, 16)
3641b430aecSDavid Hildenbrand DEF_VMAO(16, 32)
3651b430aecSDavid Hildenbrand DEF_VMAO(32, 64)
3661b430aecSDavid Hildenbrand
3671b430aecSDavid Hildenbrand #define DEF_VMALO(BITS, TBITS) \
3681b430aecSDavid Hildenbrand void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \
3691b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \
3701b430aecSDavid Hildenbrand { \
3711b430aecSDavid Hildenbrand int i, j; \
3721b430aecSDavid Hildenbrand \
3731b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
3741b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
3751b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
3768b952519SDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
3771b430aecSDavid Hildenbrand \
3781b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \
3791b430aecSDavid Hildenbrand } \
3801b430aecSDavid Hildenbrand }
3811b430aecSDavid Hildenbrand DEF_VMALO(8, 16)
3821b430aecSDavid Hildenbrand DEF_VMALO(16, 32)
3831b430aecSDavid Hildenbrand DEF_VMALO(32, 64)
3842bf3ee38SDavid Hildenbrand
3852bf3ee38SDavid Hildenbrand #define DEF_VMH(BITS) \
3862bf3ee38SDavid Hildenbrand void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \
3872bf3ee38SDavid Hildenbrand uint32_t desc) \
3882bf3ee38SDavid Hildenbrand { \
3892bf3ee38SDavid Hildenbrand int i; \
3902bf3ee38SDavid Hildenbrand \
3912bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
3922bf3ee38SDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
3932bf3ee38SDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
3942bf3ee38SDavid Hildenbrand \
3952bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
3962bf3ee38SDavid Hildenbrand } \
3972bf3ee38SDavid Hildenbrand }
3982bf3ee38SDavid Hildenbrand DEF_VMH(8)
3992bf3ee38SDavid Hildenbrand DEF_VMH(16)
4002bf3ee38SDavid Hildenbrand
4012bf3ee38SDavid Hildenbrand #define DEF_VMLH(BITS) \
4022bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \
4032bf3ee38SDavid Hildenbrand uint32_t desc) \
4042bf3ee38SDavid Hildenbrand { \
4052bf3ee38SDavid Hildenbrand int i; \
4062bf3ee38SDavid Hildenbrand \
4072bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
4082bf3ee38SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
4092bf3ee38SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
4102bf3ee38SDavid Hildenbrand \
4112bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \
4122bf3ee38SDavid Hildenbrand } \
4132bf3ee38SDavid Hildenbrand }
4142bf3ee38SDavid Hildenbrand DEF_VMLH(8)
4152bf3ee38SDavid Hildenbrand DEF_VMLH(16)
4162bf3ee38SDavid Hildenbrand
4172bf3ee38SDavid Hildenbrand #define DEF_VME(BITS, TBITS) \
4182bf3ee38SDavid Hildenbrand void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \
4192bf3ee38SDavid Hildenbrand uint32_t desc) \
4202bf3ee38SDavid Hildenbrand { \
4212bf3ee38SDavid Hildenbrand int i, j; \
4222bf3ee38SDavid Hildenbrand \
4232bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
4242bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
4252bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
4262bf3ee38SDavid Hildenbrand \
4272bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \
4282bf3ee38SDavid Hildenbrand } \
4292bf3ee38SDavid Hildenbrand }
4302bf3ee38SDavid Hildenbrand DEF_VME(8, 16)
4312bf3ee38SDavid Hildenbrand DEF_VME(16, 32)
4322bf3ee38SDavid Hildenbrand DEF_VME(32, 64)
4332bf3ee38SDavid Hildenbrand
4342bf3ee38SDavid Hildenbrand #define DEF_VMLE(BITS, TBITS) \
4352bf3ee38SDavid Hildenbrand void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \
4362bf3ee38SDavid Hildenbrand uint32_t desc) \
4372bf3ee38SDavid Hildenbrand { \
4382bf3ee38SDavid Hildenbrand int i, j; \
4392bf3ee38SDavid Hildenbrand \
4402bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
4412bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
4422bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
4432bf3ee38SDavid Hildenbrand \
4442bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \
4452bf3ee38SDavid Hildenbrand } \
4462bf3ee38SDavid Hildenbrand }
4472bf3ee38SDavid Hildenbrand DEF_VMLE(8, 16)
4482bf3ee38SDavid Hildenbrand DEF_VMLE(16, 32)
4492bf3ee38SDavid Hildenbrand DEF_VMLE(32, 64)
4502bf3ee38SDavid Hildenbrand
4512bf3ee38SDavid Hildenbrand #define DEF_VMO(BITS, TBITS) \
4522bf3ee38SDavid Hildenbrand void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \
4532bf3ee38SDavid Hildenbrand uint32_t desc) \
4542bf3ee38SDavid Hildenbrand { \
4552bf3ee38SDavid Hildenbrand int i, j; \
4562bf3ee38SDavid Hildenbrand \
4572bf3ee38SDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
4582bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
4592bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
4602bf3ee38SDavid Hildenbrand \
4612bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \
4622bf3ee38SDavid Hildenbrand } \
4632bf3ee38SDavid Hildenbrand }
4642bf3ee38SDavid Hildenbrand DEF_VMO(8, 16)
4652bf3ee38SDavid Hildenbrand DEF_VMO(16, 32)
4662bf3ee38SDavid Hildenbrand DEF_VMO(32, 64)
4672bf3ee38SDavid Hildenbrand
4682bf3ee38SDavid Hildenbrand #define DEF_VMLO(BITS, TBITS) \
4692bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \
4702bf3ee38SDavid Hildenbrand uint32_t desc) \
4712bf3ee38SDavid Hildenbrand { \
4722bf3ee38SDavid Hildenbrand int i, j; \
4732bf3ee38SDavid Hildenbrand \
47449a7ce4eSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
4752bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
4762bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
4772bf3ee38SDavid Hildenbrand \
4782bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \
4792bf3ee38SDavid Hildenbrand } \
4802bf3ee38SDavid Hildenbrand }
4812bf3ee38SDavid Hildenbrand DEF_VMLO(8, 16)
4822bf3ee38SDavid Hildenbrand DEF_VMLO(16, 32)
4832bf3ee38SDavid Hildenbrand DEF_VMLO(32, 64)
484c3838aaaSDavid Hildenbrand
485c3838aaaSDavid Hildenbrand #define DEF_VPOPCT(BITS) \
486c3838aaaSDavid Hildenbrand void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \
487c3838aaaSDavid Hildenbrand { \
488c3838aaaSDavid Hildenbrand int i; \
489c3838aaaSDavid Hildenbrand \
490c3838aaaSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
491c3838aaaSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
492c3838aaaSDavid Hildenbrand \
493c3838aaaSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, ctpop32(a)); \
494c3838aaaSDavid Hildenbrand } \
495c3838aaaSDavid Hildenbrand }
496c3838aaaSDavid Hildenbrand DEF_VPOPCT(8)
497c3838aaaSDavid Hildenbrand DEF_VPOPCT(16)
49855236da2SDavid Hildenbrand
4995c4b0ab4SDavid Hildenbrand #define DEF_VERIM(BITS) \
5005c4b0ab4SDavid Hildenbrand void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \
5015c4b0ab4SDavid Hildenbrand uint32_t desc) \
5025c4b0ab4SDavid Hildenbrand { \
5035c4b0ab4SDavid Hildenbrand const uint8_t count = simd_data(desc); \
5045c4b0ab4SDavid Hildenbrand int i; \
5055c4b0ab4SDavid Hildenbrand \
5065c4b0ab4SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
5075c4b0ab4SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \
5085c4b0ab4SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \
5095c4b0ab4SDavid Hildenbrand const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \
5105c4b0ab4SDavid Hildenbrand const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \
5115c4b0ab4SDavid Hildenbrand \
5125c4b0ab4SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, d); \
5135c4b0ab4SDavid Hildenbrand } \
5145c4b0ab4SDavid Hildenbrand }
5155c4b0ab4SDavid Hildenbrand DEF_VERIM(8)
5165c4b0ab4SDavid Hildenbrand DEF_VERIM(16)
517dea33fc3SDavid Hildenbrand
HELPER(gvec_vsl)518dea33fc3SDavid Hildenbrand void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
519dea33fc3SDavid Hildenbrand uint32_t desc)
520dea33fc3SDavid Hildenbrand {
521dea33fc3SDavid Hildenbrand s390_vec_shl(v1, v2, count);
522dea33fc3SDavid Hildenbrand }
5235f724887SDavid Hildenbrand
HELPER(gvec_vsl_ve2)524b7a50eb7SDavid Miller void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
525b7a50eb7SDavid Miller uint32_t desc)
526b7a50eb7SDavid Miller {
527b7a50eb7SDavid Miller S390Vector tmp;
528b7a50eb7SDavid Miller uint32_t sh, e0, e1 = 0;
529b7a50eb7SDavid Miller int i;
530b7a50eb7SDavid Miller
531b7a50eb7SDavid Miller for (i = 15; i >= 0; --i, e1 = e0) {
532b7a50eb7SDavid Miller e0 = s390_vec_read_element8(v2, i);
533b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7;
534b7a50eb7SDavid Miller
535b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
536b7a50eb7SDavid Miller }
537b7a50eb7SDavid Miller
538b7a50eb7SDavid Miller *(S390Vector *)v1 = tmp;
539b7a50eb7SDavid Miller }
540b7a50eb7SDavid Miller
HELPER(gvec_vsra)5415f724887SDavid Hildenbrand void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
5425f724887SDavid Hildenbrand uint32_t desc)
5435f724887SDavid Hildenbrand {
5445f724887SDavid Hildenbrand s390_vec_sar(v1, v2, count);
5455f724887SDavid Hildenbrand }
5468112274fSDavid Hildenbrand
HELPER(gvec_vsra_ve2)547b7a50eb7SDavid Miller void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
548b7a50eb7SDavid Miller uint32_t desc)
549b7a50eb7SDavid Miller {
550b7a50eb7SDavid Miller S390Vector tmp;
551b7a50eb7SDavid Miller uint32_t sh, e0, e1 = 0;
552b7a50eb7SDavid Miller int i = 0;
553b7a50eb7SDavid Miller
554b7a50eb7SDavid Miller /* Byte 0 is special only. */
555b7a50eb7SDavid Miller e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
556b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7;
557b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, e0 >> sh);
558b7a50eb7SDavid Miller
559b7a50eb7SDavid Miller e1 = e0;
560b7a50eb7SDavid Miller for (i = 1; i < 16; ++i, e1 = e0) {
561b7a50eb7SDavid Miller e0 = s390_vec_read_element8(v2, i);
562b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7;
563b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
564b7a50eb7SDavid Miller }
565b7a50eb7SDavid Miller
566b7a50eb7SDavid Miller *(S390Vector *)v1 = tmp;
567b7a50eb7SDavid Miller }
568b7a50eb7SDavid Miller
HELPER(gvec_vsrl)5698112274fSDavid Hildenbrand void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
5708112274fSDavid Hildenbrand uint32_t desc)
5718112274fSDavid Hildenbrand {
5728112274fSDavid Hildenbrand s390_vec_shr(v1, v2, count);
5738112274fSDavid Hildenbrand }
5741ee2d7baSDavid Hildenbrand
HELPER(gvec_vsrl_ve2)575b7a50eb7SDavid Miller void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
576b7a50eb7SDavid Miller uint32_t desc)
577b7a50eb7SDavid Miller {
578b7a50eb7SDavid Miller S390Vector tmp;
579b7a50eb7SDavid Miller uint32_t sh, e0, e1 = 0;
580b7a50eb7SDavid Miller
581b7a50eb7SDavid Miller for (int i = 0; i < 16; ++i, e1 = e0) {
582b7a50eb7SDavid Miller e0 = s390_vec_read_element8(v2, i);
583b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7;
584b7a50eb7SDavid Miller
585b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
586b7a50eb7SDavid Miller }
587b7a50eb7SDavid Miller
588b7a50eb7SDavid Miller *(S390Vector *)v1 = tmp;
589b7a50eb7SDavid Miller }
590b7a50eb7SDavid Miller
5911ee2d7baSDavid Hildenbrand #define DEF_VSCBI(BITS) \
5921ee2d7baSDavid Hildenbrand void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \
5931ee2d7baSDavid Hildenbrand uint32_t desc) \
5941ee2d7baSDavid Hildenbrand { \
5951ee2d7baSDavid Hildenbrand int i; \
5961ee2d7baSDavid Hildenbrand \
5971ee2d7baSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \
5981ee2d7baSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
5991ee2d7baSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
6001ee2d7baSDavid Hildenbrand \
60123e79774SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a >= b); \
6021ee2d7baSDavid Hildenbrand } \
6031ee2d7baSDavid Hildenbrand }
6041ee2d7baSDavid Hildenbrand DEF_VSCBI(8)
6051ee2d7baSDavid Hildenbrand DEF_VSCBI(16)
606db156ebfSDavid Hildenbrand
HELPER(gvec_vtm)607db156ebfSDavid Hildenbrand void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
608db156ebfSDavid Hildenbrand uint32_t desc)
609db156ebfSDavid Hildenbrand {
610db156ebfSDavid Hildenbrand S390Vector tmp;
611db156ebfSDavid Hildenbrand
612db156ebfSDavid Hildenbrand s390_vec_and(&tmp, v1, v2);
613db156ebfSDavid Hildenbrand if (s390_vec_is_zero(&tmp)) {
614db156ebfSDavid Hildenbrand /* Selected bits all zeros; or all mask bits zero */
615db156ebfSDavid Hildenbrand env->cc_op = 0;
616db156ebfSDavid Hildenbrand } else if (s390_vec_equal(&tmp, v2)) {
617db156ebfSDavid Hildenbrand /* Selected bits all ones */
618db156ebfSDavid Hildenbrand env->cc_op = 3;
619db156ebfSDavid Hildenbrand } else {
620db156ebfSDavid Hildenbrand /* Selected bits a mix of zeros and ones */
621db156ebfSDavid Hildenbrand env->cc_op = 1;
622db156ebfSDavid Hildenbrand }
623db156ebfSDavid Hildenbrand }
624