1c1a81d4bSDavid Hildenbrand /* 2c1a81d4bSDavid Hildenbrand * QEMU TCG support -- s390x vector integer instruction support 3c1a81d4bSDavid Hildenbrand * 4c1a81d4bSDavid Hildenbrand * Copyright (C) 2019 Red Hat Inc 5c1a81d4bSDavid Hildenbrand * 6c1a81d4bSDavid Hildenbrand * Authors: 7c1a81d4bSDavid Hildenbrand * David Hildenbrand <david@redhat.com> 8c1a81d4bSDavid Hildenbrand * 9c1a81d4bSDavid Hildenbrand * This work is licensed under the terms of the GNU GPL, version 2 or later. 10c1a81d4bSDavid Hildenbrand * See the COPYING file in the top-level directory. 11c1a81d4bSDavid Hildenbrand */ 12c1a81d4bSDavid Hildenbrand #include "qemu/osdep.h" 13c1a81d4bSDavid Hildenbrand #include "cpu.h" 14c1a81d4bSDavid Hildenbrand #include "vec.h" 15c1a81d4bSDavid Hildenbrand #include "exec/helper-proto.h" 165c4b0ab4SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h" 172d8bc681SRichard Henderson #include "crypto/clmul.h" 18c1a81d4bSDavid Hildenbrand 19697a45d6SDavid Hildenbrand static bool s390_vec_is_zero(const S390Vector *v) 20697a45d6SDavid Hildenbrand { 21697a45d6SDavid Hildenbrand return !v->doubleword[0] && !v->doubleword[1]; 22697a45d6SDavid Hildenbrand } 23697a45d6SDavid Hildenbrand 24697a45d6SDavid Hildenbrand static void s390_vec_xor(S390Vector *res, const S390Vector *a, 25697a45d6SDavid Hildenbrand const S390Vector *b) 26697a45d6SDavid Hildenbrand { 27697a45d6SDavid Hildenbrand res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 28697a45d6SDavid Hildenbrand res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 29697a45d6SDavid Hildenbrand } 30697a45d6SDavid Hildenbrand 31db156ebfSDavid Hildenbrand static void s390_vec_and(S390Vector *res, const S390Vector *a, 32db156ebfSDavid Hildenbrand const S390Vector *b) 33db156ebfSDavid Hildenbrand { 34db156ebfSDavid Hildenbrand res->doubleword[0] = a->doubleword[0] & b->doubleword[0]; 35db156ebfSDavid Hildenbrand res->doubleword[1] = a->doubleword[1] & b->doubleword[1]; 36db156ebfSDavid Hildenbrand } 37db156ebfSDavid Hildenbrand 38db156ebfSDavid Hildenbrand static bool s390_vec_equal(const S390Vector *a, const S390Vector *b) 39db156ebfSDavid Hildenbrand { 40db156ebfSDavid Hildenbrand return a->doubleword[0] == b->doubleword[0] && 41db156ebfSDavid Hildenbrand a->doubleword[1] == b->doubleword[1]; 42db156ebfSDavid Hildenbrand } 43db156ebfSDavid Hildenbrand 44697a45d6SDavid Hildenbrand static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 45697a45d6SDavid Hildenbrand { 46697a45d6SDavid Hildenbrand uint64_t tmp; 47697a45d6SDavid Hildenbrand 48697a45d6SDavid Hildenbrand g_assert(count < 128); 49697a45d6SDavid Hildenbrand if (count == 0) { 50697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 51697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 52697a45d6SDavid Hildenbrand } else if (count == 64) { 53697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1]; 54697a45d6SDavid Hildenbrand d->doubleword[1] = 0; 55697a45d6SDavid Hildenbrand } else if (count < 64) { 56697a45d6SDavid Hildenbrand tmp = extract64(a->doubleword[1], 64 - count, count); 57697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1] << count; 58697a45d6SDavid Hildenbrand d->doubleword[0] = (a->doubleword[0] << count) | tmp; 59697a45d6SDavid Hildenbrand } else { 60697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1] << (count - 64); 61697a45d6SDavid Hildenbrand d->doubleword[1] = 0; 62697a45d6SDavid Hildenbrand } 63697a45d6SDavid Hildenbrand } 64697a45d6SDavid Hildenbrand 655f724887SDavid Hildenbrand static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) 665f724887SDavid Hildenbrand { 675f724887SDavid Hildenbrand uint64_t tmp; 685f724887SDavid Hildenbrand 695f724887SDavid Hildenbrand if (count == 0) { 705f724887SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 715f724887SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 725f724887SDavid Hildenbrand } else if (count == 64) { 73b57b3368SDavid Hildenbrand tmp = (int64_t)a->doubleword[0] >> 63; 745f724887SDavid Hildenbrand d->doubleword[1] = a->doubleword[0]; 75b57b3368SDavid Hildenbrand d->doubleword[0] = tmp; 765f724887SDavid Hildenbrand } else if (count < 64) { 775f724887SDavid Hildenbrand tmp = a->doubleword[1] >> count; 785f724887SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 795f724887SDavid Hildenbrand d->doubleword[0] = (int64_t)a->doubleword[0] >> count; 805f724887SDavid Hildenbrand } else { 81b57b3368SDavid Hildenbrand tmp = (int64_t)a->doubleword[0] >> 63; 825f724887SDavid Hildenbrand d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); 83b57b3368SDavid Hildenbrand d->doubleword[0] = tmp; 845f724887SDavid Hildenbrand } 855f724887SDavid Hildenbrand } 865f724887SDavid Hildenbrand 87697a45d6SDavid Hildenbrand static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 88697a45d6SDavid Hildenbrand { 89697a45d6SDavid Hildenbrand uint64_t tmp; 90697a45d6SDavid Hildenbrand 91697a45d6SDavid Hildenbrand g_assert(count < 128); 92697a45d6SDavid Hildenbrand if (count == 0) { 93697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 94697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 95697a45d6SDavid Hildenbrand } else if (count == 64) { 96697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0]; 97697a45d6SDavid Hildenbrand d->doubleword[0] = 0; 98697a45d6SDavid Hildenbrand } else if (count < 64) { 99697a45d6SDavid Hildenbrand tmp = a->doubleword[1] >> count; 100697a45d6SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 101697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0] >> count; 102697a45d6SDavid Hildenbrand } else { 103697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0] >> (count - 64); 104697a45d6SDavid Hildenbrand d->doubleword[0] = 0; 105697a45d6SDavid Hildenbrand } 106697a45d6SDavid Hildenbrand } 107c1a81d4bSDavid Hildenbrand #define DEF_VAVG(BITS) \ 108c1a81d4bSDavid Hildenbrand void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 109c1a81d4bSDavid Hildenbrand uint32_t desc) \ 110c1a81d4bSDavid Hildenbrand { \ 111c1a81d4bSDavid Hildenbrand int i; \ 112c1a81d4bSDavid Hildenbrand \ 113c1a81d4bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 114c1a81d4bSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 115c1a81d4bSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 116c1a81d4bSDavid Hildenbrand \ 117c1a81d4bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 118c1a81d4bSDavid Hildenbrand } \ 119c1a81d4bSDavid Hildenbrand } 120c1a81d4bSDavid Hildenbrand DEF_VAVG(8) 121c1a81d4bSDavid Hildenbrand DEF_VAVG(16) 122801aa78bSDavid Hildenbrand 123801aa78bSDavid Hildenbrand #define DEF_VAVGL(BITS) \ 124801aa78bSDavid Hildenbrand void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 125801aa78bSDavid Hildenbrand uint32_t desc) \ 126801aa78bSDavid Hildenbrand { \ 127801aa78bSDavid Hildenbrand int i; \ 128801aa78bSDavid Hildenbrand \ 129801aa78bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 130801aa78bSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 131801aa78bSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 132801aa78bSDavid Hildenbrand \ 133801aa78bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 134801aa78bSDavid Hildenbrand } \ 135801aa78bSDavid Hildenbrand } 136801aa78bSDavid Hildenbrand DEF_VAVGL(8) 137801aa78bSDavid Hildenbrand DEF_VAVGL(16) 13828863f1dSDavid Hildenbrand 13928863f1dSDavid Hildenbrand #define DEF_VCLZ(BITS) \ 14028863f1dSDavid Hildenbrand void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 14128863f1dSDavid Hildenbrand { \ 14228863f1dSDavid Hildenbrand int i; \ 14328863f1dSDavid Hildenbrand \ 14428863f1dSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 14528863f1dSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 14628863f1dSDavid Hildenbrand \ 14728863f1dSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 14828863f1dSDavid Hildenbrand } \ 14928863f1dSDavid Hildenbrand } 15028863f1dSDavid Hildenbrand DEF_VCLZ(8) 15128863f1dSDavid Hildenbrand DEF_VCLZ(16) 152449a8ac2SDavid Hildenbrand 153449a8ac2SDavid Hildenbrand #define DEF_VCTZ(BITS) \ 154449a8ac2SDavid Hildenbrand void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 155449a8ac2SDavid Hildenbrand { \ 156449a8ac2SDavid Hildenbrand int i; \ 157449a8ac2SDavid Hildenbrand \ 158449a8ac2SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 159449a8ac2SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 160449a8ac2SDavid Hildenbrand \ 161449a8ac2SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 162449a8ac2SDavid Hildenbrand } \ 163449a8ac2SDavid Hildenbrand } 164449a8ac2SDavid Hildenbrand DEF_VCTZ(8) 165449a8ac2SDavid Hildenbrand DEF_VCTZ(16) 166697a45d6SDavid Hildenbrand 167697a45d6SDavid Hildenbrand /* like binary multiplication, but XOR instead of addition */ 168697a45d6SDavid Hildenbrand #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ 169697a45d6SDavid Hildenbrand static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ 170697a45d6SDavid Hildenbrand uint##TBITS##_t b) \ 171697a45d6SDavid Hildenbrand { \ 172697a45d6SDavid Hildenbrand uint##TBITS##_t res = 0; \ 173697a45d6SDavid Hildenbrand \ 174697a45d6SDavid Hildenbrand while (b) { \ 175697a45d6SDavid Hildenbrand if (b & 0x1) { \ 176697a45d6SDavid Hildenbrand res = res ^ a; \ 177697a45d6SDavid Hildenbrand } \ 178697a45d6SDavid Hildenbrand a = a << 1; \ 179697a45d6SDavid Hildenbrand b = b >> 1; \ 180697a45d6SDavid Hildenbrand } \ 181697a45d6SDavid Hildenbrand return res; \ 182697a45d6SDavid Hildenbrand } 183697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(32, 64) 184697a45d6SDavid Hildenbrand 185697a45d6SDavid Hildenbrand static S390Vector galois_multiply64(uint64_t a, uint64_t b) 186697a45d6SDavid Hildenbrand { 187697a45d6SDavid Hildenbrand S390Vector res = {}; 188697a45d6SDavid Hildenbrand S390Vector va = { 189697a45d6SDavid Hildenbrand .doubleword[1] = a, 190697a45d6SDavid Hildenbrand }; 191697a45d6SDavid Hildenbrand S390Vector vb = { 192697a45d6SDavid Hildenbrand .doubleword[1] = b, 193697a45d6SDavid Hildenbrand }; 194697a45d6SDavid Hildenbrand 195697a45d6SDavid Hildenbrand while (!s390_vec_is_zero(&vb)) { 196697a45d6SDavid Hildenbrand if (vb.doubleword[1] & 0x1) { 197697a45d6SDavid Hildenbrand s390_vec_xor(&res, &res, &va); 198697a45d6SDavid Hildenbrand } 199697a45d6SDavid Hildenbrand s390_vec_shl(&va, &va, 1); 200697a45d6SDavid Hildenbrand s390_vec_shr(&vb, &vb, 1); 201697a45d6SDavid Hildenbrand } 202697a45d6SDavid Hildenbrand return res; 203697a45d6SDavid Hildenbrand } 204697a45d6SDavid Hildenbrand 2052d8bc681SRichard Henderson /* 2062d8bc681SRichard Henderson * There is no carry across the two doublewords, so their order does 2072d8bc681SRichard Henderson * not matter. Nor is there partial overlap between registers. 2082d8bc681SRichard Henderson */ 2092d8bc681SRichard Henderson static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a) 2102d8bc681SRichard Henderson { 2112d8bc681SRichard Henderson return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a; 2122d8bc681SRichard Henderson } 2132d8bc681SRichard Henderson 2142d8bc681SRichard Henderson void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d) 2152d8bc681SRichard Henderson { 2162d8bc681SRichard Henderson uint64_t *q1 = v1; 2172d8bc681SRichard Henderson const uint64_t *q2 = v2, *q3 = v3; 2182d8bc681SRichard Henderson 2192d8bc681SRichard Henderson q1[0] = do_gfma8(q2[0], q3[0], 0); 2202d8bc681SRichard Henderson q1[1] = do_gfma8(q2[1], q3[1], 0); 2212d8bc681SRichard Henderson } 2222d8bc681SRichard Henderson 2232d8bc681SRichard Henderson void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3, 2242d8bc681SRichard Henderson const void *v4, uint32_t desc) 2252d8bc681SRichard Henderson { 2262d8bc681SRichard Henderson uint64_t *q1 = v1; 2272d8bc681SRichard Henderson const uint64_t *q2 = v2, *q3 = v3, *q4 = v4; 2282d8bc681SRichard Henderson 2292d8bc681SRichard Henderson q1[0] = do_gfma8(q2[0], q3[0], q4[0]); 2302d8bc681SRichard Henderson q1[1] = do_gfma8(q2[1], q3[1], q4[1]); 2312d8bc681SRichard Henderson } 2322d8bc681SRichard Henderson 233*25c304e9SRichard Henderson static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a) 234*25c304e9SRichard Henderson { 235*25c304e9SRichard Henderson return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a; 236*25c304e9SRichard Henderson } 237*25c304e9SRichard Henderson 238*25c304e9SRichard Henderson void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d) 239*25c304e9SRichard Henderson { 240*25c304e9SRichard Henderson uint64_t *q1 = v1; 241*25c304e9SRichard Henderson const uint64_t *q2 = v2, *q3 = v3; 242*25c304e9SRichard Henderson 243*25c304e9SRichard Henderson q1[0] = do_gfma16(q2[0], q3[0], 0); 244*25c304e9SRichard Henderson q1[1] = do_gfma16(q2[1], q3[1], 0); 245*25c304e9SRichard Henderson } 246*25c304e9SRichard Henderson 247*25c304e9SRichard Henderson void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3, 248*25c304e9SRichard Henderson const void *v4, uint32_t d) 249*25c304e9SRichard Henderson { 250*25c304e9SRichard Henderson uint64_t *q1 = v1; 251*25c304e9SRichard Henderson const uint64_t *q2 = v2, *q3 = v3, *q4 = v4; 252*25c304e9SRichard Henderson 253*25c304e9SRichard Henderson q1[0] = do_gfma16(q2[0], q3[0], q4[0]); 254*25c304e9SRichard Henderson q1[1] = do_gfma16(q2[1], q3[1], q4[1]); 255*25c304e9SRichard Henderson } 256*25c304e9SRichard Henderson 257697a45d6SDavid Hildenbrand #define DEF_VGFM(BITS, TBITS) \ 258697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ 259697a45d6SDavid Hildenbrand uint32_t desc) \ 260697a45d6SDavid Hildenbrand { \ 261697a45d6SDavid Hildenbrand int i; \ 262697a45d6SDavid Hildenbrand \ 263697a45d6SDavid Hildenbrand for (i = 0; i < (128 / TBITS); i++) { \ 264697a45d6SDavid Hildenbrand uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 265697a45d6SDavid Hildenbrand uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 266697a45d6SDavid Hildenbrand uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 267697a45d6SDavid Hildenbrand \ 268697a45d6SDavid Hildenbrand a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 269697a45d6SDavid Hildenbrand b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 270697a45d6SDavid Hildenbrand d = d ^ galois_multiply32(a, b); \ 271697a45d6SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, d); \ 272697a45d6SDavid Hildenbrand } \ 273697a45d6SDavid Hildenbrand } 274697a45d6SDavid Hildenbrand DEF_VGFM(32, 64) 275697a45d6SDavid Hildenbrand 276697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 277697a45d6SDavid Hildenbrand uint32_t desc) 278697a45d6SDavid Hildenbrand { 279697a45d6SDavid Hildenbrand S390Vector tmp1, tmp2; 280697a45d6SDavid Hildenbrand uint64_t a, b; 281697a45d6SDavid Hildenbrand 282697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 0); 283697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 0); 284697a45d6SDavid Hildenbrand tmp1 = galois_multiply64(a, b); 285697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 1); 286697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 1); 287697a45d6SDavid Hildenbrand tmp2 = galois_multiply64(a, b); 288697a45d6SDavid Hildenbrand s390_vec_xor(v1, &tmp1, &tmp2); 289697a45d6SDavid Hildenbrand } 290697a45d6SDavid Hildenbrand 291697a45d6SDavid Hildenbrand #define DEF_VGFMA(BITS, TBITS) \ 292697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ 293697a45d6SDavid Hildenbrand const void *v4, uint32_t desc) \ 294697a45d6SDavid Hildenbrand { \ 295697a45d6SDavid Hildenbrand int i; \ 296697a45d6SDavid Hildenbrand \ 297697a45d6SDavid Hildenbrand for (i = 0; i < (128 / TBITS); i++) { \ 298697a45d6SDavid Hildenbrand uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 299697a45d6SDavid Hildenbrand uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 300697a45d6SDavid Hildenbrand uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 301697a45d6SDavid Hildenbrand \ 302697a45d6SDavid Hildenbrand a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 303697a45d6SDavid Hildenbrand b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 304697a45d6SDavid Hildenbrand d = d ^ galois_multiply32(a, b); \ 305697a45d6SDavid Hildenbrand d = d ^ s390_vec_read_element##TBITS(v4, i); \ 306697a45d6SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, d); \ 307697a45d6SDavid Hildenbrand } \ 308697a45d6SDavid Hildenbrand } 309697a45d6SDavid Hildenbrand DEF_VGFMA(32, 64) 310697a45d6SDavid Hildenbrand 311697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 312697a45d6SDavid Hildenbrand const void *v4, uint32_t desc) 313697a45d6SDavid Hildenbrand { 314697a45d6SDavid Hildenbrand S390Vector tmp1, tmp2; 315697a45d6SDavid Hildenbrand uint64_t a, b; 316697a45d6SDavid Hildenbrand 317697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 0); 318697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 0); 319697a45d6SDavid Hildenbrand tmp1 = galois_multiply64(a, b); 320697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 1); 321697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 1); 322697a45d6SDavid Hildenbrand tmp2 = galois_multiply64(a, b); 323697a45d6SDavid Hildenbrand s390_vec_xor(&tmp1, &tmp1, &tmp2); 324697a45d6SDavid Hildenbrand s390_vec_xor(v1, &tmp1, v4); 325697a45d6SDavid Hildenbrand } 3261b430aecSDavid Hildenbrand 3271b430aecSDavid Hildenbrand #define DEF_VMAL(BITS) \ 3281b430aecSDavid Hildenbrand void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 3291b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3301b430aecSDavid Hildenbrand { \ 3311b430aecSDavid Hildenbrand int i; \ 3321b430aecSDavid Hildenbrand \ 3331b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 3341b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 3351b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 3361b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 3371b430aecSDavid Hildenbrand \ 3381b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a * b + c); \ 3391b430aecSDavid Hildenbrand } \ 3401b430aecSDavid Hildenbrand } 3411b430aecSDavid Hildenbrand DEF_VMAL(8) 3421b430aecSDavid Hildenbrand DEF_VMAL(16) 3431b430aecSDavid Hildenbrand 3441b430aecSDavid Hildenbrand #define DEF_VMAH(BITS) \ 3451b430aecSDavid Hildenbrand void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 3461b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3471b430aecSDavid Hildenbrand { \ 3481b430aecSDavid Hildenbrand int i; \ 3491b430aecSDavid Hildenbrand \ 3501b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 3511b430aecSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 3521b430aecSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 3531b430aecSDavid Hildenbrand const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 3541b430aecSDavid Hildenbrand \ 3551b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 3561b430aecSDavid Hildenbrand } \ 3571b430aecSDavid Hildenbrand } 3581b430aecSDavid Hildenbrand DEF_VMAH(8) 3591b430aecSDavid Hildenbrand DEF_VMAH(16) 3601b430aecSDavid Hildenbrand 3611b430aecSDavid Hildenbrand #define DEF_VMALH(BITS) \ 3621b430aecSDavid Hildenbrand void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 3631b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3641b430aecSDavid Hildenbrand { \ 3651b430aecSDavid Hildenbrand int i; \ 3661b430aecSDavid Hildenbrand \ 3671b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 3681b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 3691b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 3701b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 3711b430aecSDavid Hildenbrand \ 3721b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 3731b430aecSDavid Hildenbrand } \ 3741b430aecSDavid Hildenbrand } 3751b430aecSDavid Hildenbrand DEF_VMALH(8) 3761b430aecSDavid Hildenbrand DEF_VMALH(16) 3771b430aecSDavid Hildenbrand 3781b430aecSDavid Hildenbrand #define DEF_VMAE(BITS, TBITS) \ 3791b430aecSDavid Hildenbrand void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 3801b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3811b430aecSDavid Hildenbrand { \ 3821b430aecSDavid Hildenbrand int i, j; \ 3831b430aecSDavid Hildenbrand \ 3841b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 3851b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 3861b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 3878b952519SDavid Hildenbrand int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 3881b430aecSDavid Hildenbrand \ 3891b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3901b430aecSDavid Hildenbrand } \ 3911b430aecSDavid Hildenbrand } 3921b430aecSDavid Hildenbrand DEF_VMAE(8, 16) 3931b430aecSDavid Hildenbrand DEF_VMAE(16, 32) 3941b430aecSDavid Hildenbrand DEF_VMAE(32, 64) 3951b430aecSDavid Hildenbrand 3961b430aecSDavid Hildenbrand #define DEF_VMALE(BITS, TBITS) \ 3971b430aecSDavid Hildenbrand void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 3981b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3991b430aecSDavid Hildenbrand { \ 4001b430aecSDavid Hildenbrand int i, j; \ 4011b430aecSDavid Hildenbrand \ 4021b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 4031b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 4041b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 4058b952519SDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 4061b430aecSDavid Hildenbrand \ 4071b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 4081b430aecSDavid Hildenbrand } \ 4091b430aecSDavid Hildenbrand } 4101b430aecSDavid Hildenbrand DEF_VMALE(8, 16) 4111b430aecSDavid Hildenbrand DEF_VMALE(16, 32) 4121b430aecSDavid Hildenbrand DEF_VMALE(32, 64) 4131b430aecSDavid Hildenbrand 4141b430aecSDavid Hildenbrand #define DEF_VMAO(BITS, TBITS) \ 4151b430aecSDavid Hildenbrand void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 4161b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 4171b430aecSDavid Hildenbrand { \ 4181b430aecSDavid Hildenbrand int i, j; \ 4191b430aecSDavid Hildenbrand \ 4201b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 4211b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 4221b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 4238b952519SDavid Hildenbrand int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 4241b430aecSDavid Hildenbrand \ 4251b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 4261b430aecSDavid Hildenbrand } \ 4271b430aecSDavid Hildenbrand } 4281b430aecSDavid Hildenbrand DEF_VMAO(8, 16) 4291b430aecSDavid Hildenbrand DEF_VMAO(16, 32) 4301b430aecSDavid Hildenbrand DEF_VMAO(32, 64) 4311b430aecSDavid Hildenbrand 4321b430aecSDavid Hildenbrand #define DEF_VMALO(BITS, TBITS) \ 4331b430aecSDavid Hildenbrand void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 4341b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 4351b430aecSDavid Hildenbrand { \ 4361b430aecSDavid Hildenbrand int i, j; \ 4371b430aecSDavid Hildenbrand \ 4381b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 4391b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 4401b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 4418b952519SDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 4421b430aecSDavid Hildenbrand \ 4431b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 4441b430aecSDavid Hildenbrand } \ 4451b430aecSDavid Hildenbrand } 4461b430aecSDavid Hildenbrand DEF_VMALO(8, 16) 4471b430aecSDavid Hildenbrand DEF_VMALO(16, 32) 4481b430aecSDavid Hildenbrand DEF_VMALO(32, 64) 4492bf3ee38SDavid Hildenbrand 4502bf3ee38SDavid Hildenbrand #define DEF_VMH(BITS) \ 4512bf3ee38SDavid Hildenbrand void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 4522bf3ee38SDavid Hildenbrand uint32_t desc) \ 4532bf3ee38SDavid Hildenbrand { \ 4542bf3ee38SDavid Hildenbrand int i; \ 4552bf3ee38SDavid Hildenbrand \ 4562bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 4572bf3ee38SDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 4582bf3ee38SDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 4592bf3ee38SDavid Hildenbrand \ 4602bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 4612bf3ee38SDavid Hildenbrand } \ 4622bf3ee38SDavid Hildenbrand } 4632bf3ee38SDavid Hildenbrand DEF_VMH(8) 4642bf3ee38SDavid Hildenbrand DEF_VMH(16) 4652bf3ee38SDavid Hildenbrand 4662bf3ee38SDavid Hildenbrand #define DEF_VMLH(BITS) \ 4672bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 4682bf3ee38SDavid Hildenbrand uint32_t desc) \ 4692bf3ee38SDavid Hildenbrand { \ 4702bf3ee38SDavid Hildenbrand int i; \ 4712bf3ee38SDavid Hildenbrand \ 4722bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 4732bf3ee38SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 4742bf3ee38SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 4752bf3ee38SDavid Hildenbrand \ 4762bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 4772bf3ee38SDavid Hildenbrand } \ 4782bf3ee38SDavid Hildenbrand } 4792bf3ee38SDavid Hildenbrand DEF_VMLH(8) 4802bf3ee38SDavid Hildenbrand DEF_VMLH(16) 4812bf3ee38SDavid Hildenbrand 4822bf3ee38SDavid Hildenbrand #define DEF_VME(BITS, TBITS) \ 4832bf3ee38SDavid Hildenbrand void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 4842bf3ee38SDavid Hildenbrand uint32_t desc) \ 4852bf3ee38SDavid Hildenbrand { \ 4862bf3ee38SDavid Hildenbrand int i, j; \ 4872bf3ee38SDavid Hildenbrand \ 4882bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 4892bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 4902bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 4912bf3ee38SDavid Hildenbrand \ 4922bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4932bf3ee38SDavid Hildenbrand } \ 4942bf3ee38SDavid Hildenbrand } 4952bf3ee38SDavid Hildenbrand DEF_VME(8, 16) 4962bf3ee38SDavid Hildenbrand DEF_VME(16, 32) 4972bf3ee38SDavid Hildenbrand DEF_VME(32, 64) 4982bf3ee38SDavid Hildenbrand 4992bf3ee38SDavid Hildenbrand #define DEF_VMLE(BITS, TBITS) \ 5002bf3ee38SDavid Hildenbrand void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 5012bf3ee38SDavid Hildenbrand uint32_t desc) \ 5022bf3ee38SDavid Hildenbrand { \ 5032bf3ee38SDavid Hildenbrand int i, j; \ 5042bf3ee38SDavid Hildenbrand \ 5052bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 5062bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 5072bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 5082bf3ee38SDavid Hildenbrand \ 5092bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 5102bf3ee38SDavid Hildenbrand } \ 5112bf3ee38SDavid Hildenbrand } 5122bf3ee38SDavid Hildenbrand DEF_VMLE(8, 16) 5132bf3ee38SDavid Hildenbrand DEF_VMLE(16, 32) 5142bf3ee38SDavid Hildenbrand DEF_VMLE(32, 64) 5152bf3ee38SDavid Hildenbrand 5162bf3ee38SDavid Hildenbrand #define DEF_VMO(BITS, TBITS) \ 5172bf3ee38SDavid Hildenbrand void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 5182bf3ee38SDavid Hildenbrand uint32_t desc) \ 5192bf3ee38SDavid Hildenbrand { \ 5202bf3ee38SDavid Hildenbrand int i, j; \ 5212bf3ee38SDavid Hildenbrand \ 5222bf3ee38SDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 5232bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 5242bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 5252bf3ee38SDavid Hildenbrand \ 5262bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 5272bf3ee38SDavid Hildenbrand } \ 5282bf3ee38SDavid Hildenbrand } 5292bf3ee38SDavid Hildenbrand DEF_VMO(8, 16) 5302bf3ee38SDavid Hildenbrand DEF_VMO(16, 32) 5312bf3ee38SDavid Hildenbrand DEF_VMO(32, 64) 5322bf3ee38SDavid Hildenbrand 5332bf3ee38SDavid Hildenbrand #define DEF_VMLO(BITS, TBITS) \ 5342bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 5352bf3ee38SDavid Hildenbrand uint32_t desc) \ 5362bf3ee38SDavid Hildenbrand { \ 5372bf3ee38SDavid Hildenbrand int i, j; \ 5382bf3ee38SDavid Hildenbrand \ 53949a7ce4eSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 5402bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 5412bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 5422bf3ee38SDavid Hildenbrand \ 5432bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 5442bf3ee38SDavid Hildenbrand } \ 5452bf3ee38SDavid Hildenbrand } 5462bf3ee38SDavid Hildenbrand DEF_VMLO(8, 16) 5472bf3ee38SDavid Hildenbrand DEF_VMLO(16, 32) 5482bf3ee38SDavid Hildenbrand DEF_VMLO(32, 64) 549c3838aaaSDavid Hildenbrand 550c3838aaaSDavid Hildenbrand #define DEF_VPOPCT(BITS) \ 551c3838aaaSDavid Hildenbrand void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ 552c3838aaaSDavid Hildenbrand { \ 553c3838aaaSDavid Hildenbrand int i; \ 554c3838aaaSDavid Hildenbrand \ 555c3838aaaSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 556c3838aaaSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 557c3838aaaSDavid Hildenbrand \ 558c3838aaaSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ 559c3838aaaSDavid Hildenbrand } \ 560c3838aaaSDavid Hildenbrand } 561c3838aaaSDavid Hildenbrand DEF_VPOPCT(8) 562c3838aaaSDavid Hildenbrand DEF_VPOPCT(16) 56355236da2SDavid Hildenbrand 5645c4b0ab4SDavid Hildenbrand #define DEF_VERIM(BITS) \ 5655c4b0ab4SDavid Hildenbrand void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ 5665c4b0ab4SDavid Hildenbrand uint32_t desc) \ 5675c4b0ab4SDavid Hildenbrand { \ 5685c4b0ab4SDavid Hildenbrand const uint8_t count = simd_data(desc); \ 5695c4b0ab4SDavid Hildenbrand int i; \ 5705c4b0ab4SDavid Hildenbrand \ 5715c4b0ab4SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 5725c4b0ab4SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ 5735c4b0ab4SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ 5745c4b0ab4SDavid Hildenbrand const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ 5755c4b0ab4SDavid Hildenbrand const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ 5765c4b0ab4SDavid Hildenbrand \ 5775c4b0ab4SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, d); \ 5785c4b0ab4SDavid Hildenbrand } \ 5795c4b0ab4SDavid Hildenbrand } 5805c4b0ab4SDavid Hildenbrand DEF_VERIM(8) 5815c4b0ab4SDavid Hildenbrand DEF_VERIM(16) 582dea33fc3SDavid Hildenbrand 583dea33fc3SDavid Hildenbrand void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, 584dea33fc3SDavid Hildenbrand uint32_t desc) 585dea33fc3SDavid Hildenbrand { 586dea33fc3SDavid Hildenbrand s390_vec_shl(v1, v2, count); 587dea33fc3SDavid Hildenbrand } 5885f724887SDavid Hildenbrand 589b7a50eb7SDavid Miller void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3, 590b7a50eb7SDavid Miller uint32_t desc) 591b7a50eb7SDavid Miller { 592b7a50eb7SDavid Miller S390Vector tmp; 593b7a50eb7SDavid Miller uint32_t sh, e0, e1 = 0; 594b7a50eb7SDavid Miller int i; 595b7a50eb7SDavid Miller 596b7a50eb7SDavid Miller for (i = 15; i >= 0; --i, e1 = e0) { 597b7a50eb7SDavid Miller e0 = s390_vec_read_element8(v2, i); 598b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7; 599b7a50eb7SDavid Miller 600b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh)); 601b7a50eb7SDavid Miller } 602b7a50eb7SDavid Miller 603b7a50eb7SDavid Miller *(S390Vector *)v1 = tmp; 604b7a50eb7SDavid Miller } 605b7a50eb7SDavid Miller 6065f724887SDavid Hildenbrand void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, 6075f724887SDavid Hildenbrand uint32_t desc) 6085f724887SDavid Hildenbrand { 6095f724887SDavid Hildenbrand s390_vec_sar(v1, v2, count); 6105f724887SDavid Hildenbrand } 6118112274fSDavid Hildenbrand 612b7a50eb7SDavid Miller void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3, 613b7a50eb7SDavid Miller uint32_t desc) 614b7a50eb7SDavid Miller { 615b7a50eb7SDavid Miller S390Vector tmp; 616b7a50eb7SDavid Miller uint32_t sh, e0, e1 = 0; 617b7a50eb7SDavid Miller int i = 0; 618b7a50eb7SDavid Miller 619b7a50eb7SDavid Miller /* Byte 0 is special only. */ 620b7a50eb7SDavid Miller e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i); 621b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7; 622b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, e0 >> sh); 623b7a50eb7SDavid Miller 624b7a50eb7SDavid Miller e1 = e0; 625b7a50eb7SDavid Miller for (i = 1; i < 16; ++i, e1 = e0) { 626b7a50eb7SDavid Miller e0 = s390_vec_read_element8(v2, i); 627b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7; 628b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh); 629b7a50eb7SDavid Miller } 630b7a50eb7SDavid Miller 631b7a50eb7SDavid Miller *(S390Vector *)v1 = tmp; 632b7a50eb7SDavid Miller } 633b7a50eb7SDavid Miller 6348112274fSDavid Hildenbrand void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, 6358112274fSDavid Hildenbrand uint32_t desc) 6368112274fSDavid Hildenbrand { 6378112274fSDavid Hildenbrand s390_vec_shr(v1, v2, count); 6388112274fSDavid Hildenbrand } 6391ee2d7baSDavid Hildenbrand 640b7a50eb7SDavid Miller void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3, 641b7a50eb7SDavid Miller uint32_t desc) 642b7a50eb7SDavid Miller { 643b7a50eb7SDavid Miller S390Vector tmp; 644b7a50eb7SDavid Miller uint32_t sh, e0, e1 = 0; 645b7a50eb7SDavid Miller 646b7a50eb7SDavid Miller for (int i = 0; i < 16; ++i, e1 = e0) { 647b7a50eb7SDavid Miller e0 = s390_vec_read_element8(v2, i); 648b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7; 649b7a50eb7SDavid Miller 650b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh); 651b7a50eb7SDavid Miller } 652b7a50eb7SDavid Miller 653b7a50eb7SDavid Miller *(S390Vector *)v1 = tmp; 654b7a50eb7SDavid Miller } 655b7a50eb7SDavid Miller 6561ee2d7baSDavid Hildenbrand #define DEF_VSCBI(BITS) \ 6571ee2d7baSDavid Hildenbrand void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ 6581ee2d7baSDavid Hildenbrand uint32_t desc) \ 6591ee2d7baSDavid Hildenbrand { \ 6601ee2d7baSDavid Hildenbrand int i; \ 6611ee2d7baSDavid Hildenbrand \ 6621ee2d7baSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 6631ee2d7baSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 6641ee2d7baSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 6651ee2d7baSDavid Hildenbrand \ 66623e79774SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a >= b); \ 6671ee2d7baSDavid Hildenbrand } \ 6681ee2d7baSDavid Hildenbrand } 6691ee2d7baSDavid Hildenbrand DEF_VSCBI(8) 6701ee2d7baSDavid Hildenbrand DEF_VSCBI(16) 671db156ebfSDavid Hildenbrand 672db156ebfSDavid Hildenbrand void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env, 673db156ebfSDavid Hildenbrand uint32_t desc) 674db156ebfSDavid Hildenbrand { 675db156ebfSDavid Hildenbrand S390Vector tmp; 676db156ebfSDavid Hildenbrand 677db156ebfSDavid Hildenbrand s390_vec_and(&tmp, v1, v2); 678db156ebfSDavid Hildenbrand if (s390_vec_is_zero(&tmp)) { 679db156ebfSDavid Hildenbrand /* Selected bits all zeros; or all mask bits zero */ 680db156ebfSDavid Hildenbrand env->cc_op = 0; 681db156ebfSDavid Hildenbrand } else if (s390_vec_equal(&tmp, v2)) { 682db156ebfSDavid Hildenbrand /* Selected bits all ones */ 683db156ebfSDavid Hildenbrand env->cc_op = 3; 684db156ebfSDavid Hildenbrand } else { 685db156ebfSDavid Hildenbrand /* Selected bits a mix of zeros and ones */ 686db156ebfSDavid Hildenbrand env->cc_op = 1; 687db156ebfSDavid Hildenbrand } 688db156ebfSDavid Hildenbrand } 689