1c1a81d4bSDavid Hildenbrand /* 2c1a81d4bSDavid Hildenbrand * QEMU TCG support -- s390x vector integer instruction support 3c1a81d4bSDavid Hildenbrand * 4c1a81d4bSDavid Hildenbrand * Copyright (C) 2019 Red Hat Inc 5c1a81d4bSDavid Hildenbrand * 6c1a81d4bSDavid Hildenbrand * Authors: 7c1a81d4bSDavid Hildenbrand * David Hildenbrand <david@redhat.com> 8c1a81d4bSDavid Hildenbrand * 9c1a81d4bSDavid Hildenbrand * This work is licensed under the terms of the GNU GPL, version 2 or later. 10c1a81d4bSDavid Hildenbrand * See the COPYING file in the top-level directory. 11c1a81d4bSDavid Hildenbrand */ 12c1a81d4bSDavid Hildenbrand #include "qemu/osdep.h" 13c1a81d4bSDavid Hildenbrand #include "cpu.h" 14c1a81d4bSDavid Hildenbrand #include "vec.h" 15c1a81d4bSDavid Hildenbrand #include "exec/helper-proto.h" 165c4b0ab4SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h" 17*2d8bc681SRichard Henderson #include "crypto/clmul.h" 18c1a81d4bSDavid Hildenbrand 19697a45d6SDavid Hildenbrand static bool s390_vec_is_zero(const S390Vector *v) 20697a45d6SDavid Hildenbrand { 21697a45d6SDavid Hildenbrand return !v->doubleword[0] && !v->doubleword[1]; 22697a45d6SDavid Hildenbrand } 23697a45d6SDavid Hildenbrand 24697a45d6SDavid Hildenbrand static void s390_vec_xor(S390Vector *res, const S390Vector *a, 25697a45d6SDavid Hildenbrand const S390Vector *b) 26697a45d6SDavid Hildenbrand { 27697a45d6SDavid Hildenbrand res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 28697a45d6SDavid Hildenbrand res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 29697a45d6SDavid Hildenbrand } 30697a45d6SDavid Hildenbrand 31db156ebfSDavid Hildenbrand static void s390_vec_and(S390Vector *res, const S390Vector *a, 32db156ebfSDavid Hildenbrand const S390Vector *b) 33db156ebfSDavid Hildenbrand { 34db156ebfSDavid Hildenbrand res->doubleword[0] = a->doubleword[0] & b->doubleword[0]; 35db156ebfSDavid Hildenbrand res->doubleword[1] = a->doubleword[1] & b->doubleword[1]; 36db156ebfSDavid Hildenbrand } 37db156ebfSDavid Hildenbrand 38db156ebfSDavid Hildenbrand static bool s390_vec_equal(const S390Vector *a, const S390Vector *b) 39db156ebfSDavid Hildenbrand { 40db156ebfSDavid Hildenbrand return a->doubleword[0] == b->doubleword[0] && 41db156ebfSDavid Hildenbrand a->doubleword[1] == b->doubleword[1]; 42db156ebfSDavid Hildenbrand } 43db156ebfSDavid Hildenbrand 44697a45d6SDavid Hildenbrand static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 45697a45d6SDavid Hildenbrand { 46697a45d6SDavid Hildenbrand uint64_t tmp; 47697a45d6SDavid Hildenbrand 48697a45d6SDavid Hildenbrand g_assert(count < 128); 49697a45d6SDavid Hildenbrand if (count == 0) { 50697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 51697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 52697a45d6SDavid Hildenbrand } else if (count == 64) { 53697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1]; 54697a45d6SDavid Hildenbrand d->doubleword[1] = 0; 55697a45d6SDavid Hildenbrand } else if (count < 64) { 56697a45d6SDavid Hildenbrand tmp = extract64(a->doubleword[1], 64 - count, count); 57697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1] << count; 58697a45d6SDavid Hildenbrand d->doubleword[0] = (a->doubleword[0] << count) | tmp; 59697a45d6SDavid Hildenbrand } else { 60697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1] << (count - 64); 61697a45d6SDavid Hildenbrand d->doubleword[1] = 0; 62697a45d6SDavid Hildenbrand } 63697a45d6SDavid Hildenbrand } 64697a45d6SDavid Hildenbrand 655f724887SDavid Hildenbrand static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) 665f724887SDavid Hildenbrand { 675f724887SDavid Hildenbrand uint64_t tmp; 685f724887SDavid Hildenbrand 695f724887SDavid Hildenbrand if (count == 0) { 705f724887SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 715f724887SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 725f724887SDavid Hildenbrand } else if (count == 64) { 73b57b3368SDavid Hildenbrand tmp = (int64_t)a->doubleword[0] >> 63; 745f724887SDavid Hildenbrand d->doubleword[1] = a->doubleword[0]; 75b57b3368SDavid Hildenbrand d->doubleword[0] = tmp; 765f724887SDavid Hildenbrand } else if (count < 64) { 775f724887SDavid Hildenbrand tmp = a->doubleword[1] >> count; 785f724887SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 795f724887SDavid Hildenbrand d->doubleword[0] = (int64_t)a->doubleword[0] >> count; 805f724887SDavid Hildenbrand } else { 81b57b3368SDavid Hildenbrand tmp = (int64_t)a->doubleword[0] >> 63; 825f724887SDavid Hildenbrand d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); 83b57b3368SDavid Hildenbrand d->doubleword[0] = tmp; 845f724887SDavid Hildenbrand } 855f724887SDavid Hildenbrand } 865f724887SDavid Hildenbrand 87697a45d6SDavid Hildenbrand static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 88697a45d6SDavid Hildenbrand { 89697a45d6SDavid Hildenbrand uint64_t tmp; 90697a45d6SDavid Hildenbrand 91697a45d6SDavid Hildenbrand g_assert(count < 128); 92697a45d6SDavid Hildenbrand if (count == 0) { 93697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 94697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 95697a45d6SDavid Hildenbrand } else if (count == 64) { 96697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0]; 97697a45d6SDavid Hildenbrand d->doubleword[0] = 0; 98697a45d6SDavid Hildenbrand } else if (count < 64) { 99697a45d6SDavid Hildenbrand tmp = a->doubleword[1] >> count; 100697a45d6SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 101697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0] >> count; 102697a45d6SDavid Hildenbrand } else { 103697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0] >> (count - 64); 104697a45d6SDavid Hildenbrand d->doubleword[0] = 0; 105697a45d6SDavid Hildenbrand } 106697a45d6SDavid Hildenbrand } 107c1a81d4bSDavid Hildenbrand #define DEF_VAVG(BITS) \ 108c1a81d4bSDavid Hildenbrand void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 109c1a81d4bSDavid Hildenbrand uint32_t desc) \ 110c1a81d4bSDavid Hildenbrand { \ 111c1a81d4bSDavid Hildenbrand int i; \ 112c1a81d4bSDavid Hildenbrand \ 113c1a81d4bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 114c1a81d4bSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 115c1a81d4bSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 116c1a81d4bSDavid Hildenbrand \ 117c1a81d4bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 118c1a81d4bSDavid Hildenbrand } \ 119c1a81d4bSDavid Hildenbrand } 120c1a81d4bSDavid Hildenbrand DEF_VAVG(8) 121c1a81d4bSDavid Hildenbrand DEF_VAVG(16) 122801aa78bSDavid Hildenbrand 123801aa78bSDavid Hildenbrand #define DEF_VAVGL(BITS) \ 124801aa78bSDavid Hildenbrand void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 125801aa78bSDavid Hildenbrand uint32_t desc) \ 126801aa78bSDavid Hildenbrand { \ 127801aa78bSDavid Hildenbrand int i; \ 128801aa78bSDavid Hildenbrand \ 129801aa78bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 130801aa78bSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 131801aa78bSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 132801aa78bSDavid Hildenbrand \ 133801aa78bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 134801aa78bSDavid Hildenbrand } \ 135801aa78bSDavid Hildenbrand } 136801aa78bSDavid Hildenbrand DEF_VAVGL(8) 137801aa78bSDavid Hildenbrand DEF_VAVGL(16) 13828863f1dSDavid Hildenbrand 13928863f1dSDavid Hildenbrand #define DEF_VCLZ(BITS) \ 14028863f1dSDavid Hildenbrand void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 14128863f1dSDavid Hildenbrand { \ 14228863f1dSDavid Hildenbrand int i; \ 14328863f1dSDavid Hildenbrand \ 14428863f1dSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 14528863f1dSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 14628863f1dSDavid Hildenbrand \ 14728863f1dSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 14828863f1dSDavid Hildenbrand } \ 14928863f1dSDavid Hildenbrand } 15028863f1dSDavid Hildenbrand DEF_VCLZ(8) 15128863f1dSDavid Hildenbrand DEF_VCLZ(16) 152449a8ac2SDavid Hildenbrand 153449a8ac2SDavid Hildenbrand #define DEF_VCTZ(BITS) \ 154449a8ac2SDavid Hildenbrand void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 155449a8ac2SDavid Hildenbrand { \ 156449a8ac2SDavid Hildenbrand int i; \ 157449a8ac2SDavid Hildenbrand \ 158449a8ac2SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 159449a8ac2SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 160449a8ac2SDavid Hildenbrand \ 161449a8ac2SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 162449a8ac2SDavid Hildenbrand } \ 163449a8ac2SDavid Hildenbrand } 164449a8ac2SDavid Hildenbrand DEF_VCTZ(8) 165449a8ac2SDavid Hildenbrand DEF_VCTZ(16) 166697a45d6SDavid Hildenbrand 167697a45d6SDavid Hildenbrand /* like binary multiplication, but XOR instead of addition */ 168697a45d6SDavid Hildenbrand #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ 169697a45d6SDavid Hildenbrand static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ 170697a45d6SDavid Hildenbrand uint##TBITS##_t b) \ 171697a45d6SDavid Hildenbrand { \ 172697a45d6SDavid Hildenbrand uint##TBITS##_t res = 0; \ 173697a45d6SDavid Hildenbrand \ 174697a45d6SDavid Hildenbrand while (b) { \ 175697a45d6SDavid Hildenbrand if (b & 0x1) { \ 176697a45d6SDavid Hildenbrand res = res ^ a; \ 177697a45d6SDavid Hildenbrand } \ 178697a45d6SDavid Hildenbrand a = a << 1; \ 179697a45d6SDavid Hildenbrand b = b >> 1; \ 180697a45d6SDavid Hildenbrand } \ 181697a45d6SDavid Hildenbrand return res; \ 182697a45d6SDavid Hildenbrand } 183697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(16, 32) 184697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(32, 64) 185697a45d6SDavid Hildenbrand 186697a45d6SDavid Hildenbrand static S390Vector galois_multiply64(uint64_t a, uint64_t b) 187697a45d6SDavid Hildenbrand { 188697a45d6SDavid Hildenbrand S390Vector res = {}; 189697a45d6SDavid Hildenbrand S390Vector va = { 190697a45d6SDavid Hildenbrand .doubleword[1] = a, 191697a45d6SDavid Hildenbrand }; 192697a45d6SDavid Hildenbrand S390Vector vb = { 193697a45d6SDavid Hildenbrand .doubleword[1] = b, 194697a45d6SDavid Hildenbrand }; 195697a45d6SDavid Hildenbrand 196697a45d6SDavid Hildenbrand while (!s390_vec_is_zero(&vb)) { 197697a45d6SDavid Hildenbrand if (vb.doubleword[1] & 0x1) { 198697a45d6SDavid Hildenbrand s390_vec_xor(&res, &res, &va); 199697a45d6SDavid Hildenbrand } 200697a45d6SDavid Hildenbrand s390_vec_shl(&va, &va, 1); 201697a45d6SDavid Hildenbrand s390_vec_shr(&vb, &vb, 1); 202697a45d6SDavid Hildenbrand } 203697a45d6SDavid Hildenbrand return res; 204697a45d6SDavid Hildenbrand } 205697a45d6SDavid Hildenbrand 206*2d8bc681SRichard Henderson /* 207*2d8bc681SRichard Henderson * There is no carry across the two doublewords, so their order does 208*2d8bc681SRichard Henderson * not matter. Nor is there partial overlap between registers. 209*2d8bc681SRichard Henderson */ 210*2d8bc681SRichard Henderson static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a) 211*2d8bc681SRichard Henderson { 212*2d8bc681SRichard Henderson return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a; 213*2d8bc681SRichard Henderson } 214*2d8bc681SRichard Henderson 215*2d8bc681SRichard Henderson void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d) 216*2d8bc681SRichard Henderson { 217*2d8bc681SRichard Henderson uint64_t *q1 = v1; 218*2d8bc681SRichard Henderson const uint64_t *q2 = v2, *q3 = v3; 219*2d8bc681SRichard Henderson 220*2d8bc681SRichard Henderson q1[0] = do_gfma8(q2[0], q3[0], 0); 221*2d8bc681SRichard Henderson q1[1] = do_gfma8(q2[1], q3[1], 0); 222*2d8bc681SRichard Henderson } 223*2d8bc681SRichard Henderson 224*2d8bc681SRichard Henderson void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3, 225*2d8bc681SRichard Henderson const void *v4, uint32_t desc) 226*2d8bc681SRichard Henderson { 227*2d8bc681SRichard Henderson uint64_t *q1 = v1; 228*2d8bc681SRichard Henderson const uint64_t *q2 = v2, *q3 = v3, *q4 = v4; 229*2d8bc681SRichard Henderson 230*2d8bc681SRichard Henderson q1[0] = do_gfma8(q2[0], q3[0], q4[0]); 231*2d8bc681SRichard Henderson q1[1] = do_gfma8(q2[1], q3[1], q4[1]); 232*2d8bc681SRichard Henderson } 233*2d8bc681SRichard Henderson 234697a45d6SDavid Hildenbrand #define DEF_VGFM(BITS, TBITS) \ 235697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ 236697a45d6SDavid Hildenbrand uint32_t desc) \ 237697a45d6SDavid Hildenbrand { \ 238697a45d6SDavid Hildenbrand int i; \ 239697a45d6SDavid Hildenbrand \ 240697a45d6SDavid Hildenbrand for (i = 0; i < (128 / TBITS); i++) { \ 241697a45d6SDavid Hildenbrand uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 242697a45d6SDavid Hildenbrand uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 243697a45d6SDavid Hildenbrand uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 244697a45d6SDavid Hildenbrand \ 245697a45d6SDavid Hildenbrand a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 246697a45d6SDavid Hildenbrand b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 247697a45d6SDavid Hildenbrand d = d ^ galois_multiply32(a, b); \ 248697a45d6SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, d); \ 249697a45d6SDavid Hildenbrand } \ 250697a45d6SDavid Hildenbrand } 251697a45d6SDavid Hildenbrand DEF_VGFM(16, 32) 252697a45d6SDavid Hildenbrand DEF_VGFM(32, 64) 253697a45d6SDavid Hildenbrand 254697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 255697a45d6SDavid Hildenbrand uint32_t desc) 256697a45d6SDavid Hildenbrand { 257697a45d6SDavid Hildenbrand S390Vector tmp1, tmp2; 258697a45d6SDavid Hildenbrand uint64_t a, b; 259697a45d6SDavid Hildenbrand 260697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 0); 261697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 0); 262697a45d6SDavid Hildenbrand tmp1 = galois_multiply64(a, b); 263697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 1); 264697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 1); 265697a45d6SDavid Hildenbrand tmp2 = galois_multiply64(a, b); 266697a45d6SDavid Hildenbrand s390_vec_xor(v1, &tmp1, &tmp2); 267697a45d6SDavid Hildenbrand } 268697a45d6SDavid Hildenbrand 269697a45d6SDavid Hildenbrand #define DEF_VGFMA(BITS, TBITS) \ 270697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ 271697a45d6SDavid Hildenbrand const void *v4, uint32_t desc) \ 272697a45d6SDavid Hildenbrand { \ 273697a45d6SDavid Hildenbrand int i; \ 274697a45d6SDavid Hildenbrand \ 275697a45d6SDavid Hildenbrand for (i = 0; i < (128 / TBITS); i++) { \ 276697a45d6SDavid Hildenbrand uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 277697a45d6SDavid Hildenbrand uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 278697a45d6SDavid Hildenbrand uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 279697a45d6SDavid Hildenbrand \ 280697a45d6SDavid Hildenbrand a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 281697a45d6SDavid Hildenbrand b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 282697a45d6SDavid Hildenbrand d = d ^ galois_multiply32(a, b); \ 283697a45d6SDavid Hildenbrand d = d ^ s390_vec_read_element##TBITS(v4, i); \ 284697a45d6SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, d); \ 285697a45d6SDavid Hildenbrand } \ 286697a45d6SDavid Hildenbrand } 287697a45d6SDavid Hildenbrand DEF_VGFMA(16, 32) 288697a45d6SDavid Hildenbrand DEF_VGFMA(32, 64) 289697a45d6SDavid Hildenbrand 290697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 291697a45d6SDavid Hildenbrand const void *v4, uint32_t desc) 292697a45d6SDavid Hildenbrand { 293697a45d6SDavid Hildenbrand S390Vector tmp1, tmp2; 294697a45d6SDavid Hildenbrand uint64_t a, b; 295697a45d6SDavid Hildenbrand 296697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 0); 297697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 0); 298697a45d6SDavid Hildenbrand tmp1 = galois_multiply64(a, b); 299697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 1); 300697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 1); 301697a45d6SDavid Hildenbrand tmp2 = galois_multiply64(a, b); 302697a45d6SDavid Hildenbrand s390_vec_xor(&tmp1, &tmp1, &tmp2); 303697a45d6SDavid Hildenbrand s390_vec_xor(v1, &tmp1, v4); 304697a45d6SDavid Hildenbrand } 3051b430aecSDavid Hildenbrand 3061b430aecSDavid Hildenbrand #define DEF_VMAL(BITS) \ 3071b430aecSDavid Hildenbrand void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 3081b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3091b430aecSDavid Hildenbrand { \ 3101b430aecSDavid Hildenbrand int i; \ 3111b430aecSDavid Hildenbrand \ 3121b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 3131b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 3141b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 3151b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 3161b430aecSDavid Hildenbrand \ 3171b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a * b + c); \ 3181b430aecSDavid Hildenbrand } \ 3191b430aecSDavid Hildenbrand } 3201b430aecSDavid Hildenbrand DEF_VMAL(8) 3211b430aecSDavid Hildenbrand DEF_VMAL(16) 3221b430aecSDavid Hildenbrand 3231b430aecSDavid Hildenbrand #define DEF_VMAH(BITS) \ 3241b430aecSDavid Hildenbrand void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 3251b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3261b430aecSDavid Hildenbrand { \ 3271b430aecSDavid Hildenbrand int i; \ 3281b430aecSDavid Hildenbrand \ 3291b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 3301b430aecSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 3311b430aecSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 3321b430aecSDavid Hildenbrand const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 3331b430aecSDavid Hildenbrand \ 3341b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 3351b430aecSDavid Hildenbrand } \ 3361b430aecSDavid Hildenbrand } 3371b430aecSDavid Hildenbrand DEF_VMAH(8) 3381b430aecSDavid Hildenbrand DEF_VMAH(16) 3391b430aecSDavid Hildenbrand 3401b430aecSDavid Hildenbrand #define DEF_VMALH(BITS) \ 3411b430aecSDavid Hildenbrand void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 3421b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3431b430aecSDavid Hildenbrand { \ 3441b430aecSDavid Hildenbrand int i; \ 3451b430aecSDavid Hildenbrand \ 3461b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 3471b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 3481b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 3491b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 3501b430aecSDavid Hildenbrand \ 3511b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 3521b430aecSDavid Hildenbrand } \ 3531b430aecSDavid Hildenbrand } 3541b430aecSDavid Hildenbrand DEF_VMALH(8) 3551b430aecSDavid Hildenbrand DEF_VMALH(16) 3561b430aecSDavid Hildenbrand 3571b430aecSDavid Hildenbrand #define DEF_VMAE(BITS, TBITS) \ 3581b430aecSDavid Hildenbrand void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 3591b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3601b430aecSDavid Hildenbrand { \ 3611b430aecSDavid Hildenbrand int i, j; \ 3621b430aecSDavid Hildenbrand \ 3631b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 3641b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 3651b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 3668b952519SDavid Hildenbrand int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 3671b430aecSDavid Hildenbrand \ 3681b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3691b430aecSDavid Hildenbrand } \ 3701b430aecSDavid Hildenbrand } 3711b430aecSDavid Hildenbrand DEF_VMAE(8, 16) 3721b430aecSDavid Hildenbrand DEF_VMAE(16, 32) 3731b430aecSDavid Hildenbrand DEF_VMAE(32, 64) 3741b430aecSDavid Hildenbrand 3751b430aecSDavid Hildenbrand #define DEF_VMALE(BITS, TBITS) \ 3761b430aecSDavid Hildenbrand void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 3771b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3781b430aecSDavid Hildenbrand { \ 3791b430aecSDavid Hildenbrand int i, j; \ 3801b430aecSDavid Hildenbrand \ 3811b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 3821b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 3831b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 3848b952519SDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 3851b430aecSDavid Hildenbrand \ 3861b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3871b430aecSDavid Hildenbrand } \ 3881b430aecSDavid Hildenbrand } 3891b430aecSDavid Hildenbrand DEF_VMALE(8, 16) 3901b430aecSDavid Hildenbrand DEF_VMALE(16, 32) 3911b430aecSDavid Hildenbrand DEF_VMALE(32, 64) 3921b430aecSDavid Hildenbrand 3931b430aecSDavid Hildenbrand #define DEF_VMAO(BITS, TBITS) \ 3941b430aecSDavid Hildenbrand void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 3951b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3961b430aecSDavid Hildenbrand { \ 3971b430aecSDavid Hildenbrand int i, j; \ 3981b430aecSDavid Hildenbrand \ 3991b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 4001b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 4011b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 4028b952519SDavid Hildenbrand int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 4031b430aecSDavid Hildenbrand \ 4041b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 4051b430aecSDavid Hildenbrand } \ 4061b430aecSDavid Hildenbrand } 4071b430aecSDavid Hildenbrand DEF_VMAO(8, 16) 4081b430aecSDavid Hildenbrand DEF_VMAO(16, 32) 4091b430aecSDavid Hildenbrand DEF_VMAO(32, 64) 4101b430aecSDavid Hildenbrand 4111b430aecSDavid Hildenbrand #define DEF_VMALO(BITS, TBITS) \ 4121b430aecSDavid Hildenbrand void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 4131b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 4141b430aecSDavid Hildenbrand { \ 4151b430aecSDavid Hildenbrand int i, j; \ 4161b430aecSDavid Hildenbrand \ 4171b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 4181b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 4191b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 4208b952519SDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 4211b430aecSDavid Hildenbrand \ 4221b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 4231b430aecSDavid Hildenbrand } \ 4241b430aecSDavid Hildenbrand } 4251b430aecSDavid Hildenbrand DEF_VMALO(8, 16) 4261b430aecSDavid Hildenbrand DEF_VMALO(16, 32) 4271b430aecSDavid Hildenbrand DEF_VMALO(32, 64) 4282bf3ee38SDavid Hildenbrand 4292bf3ee38SDavid Hildenbrand #define DEF_VMH(BITS) \ 4302bf3ee38SDavid Hildenbrand void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 4312bf3ee38SDavid Hildenbrand uint32_t desc) \ 4322bf3ee38SDavid Hildenbrand { \ 4332bf3ee38SDavid Hildenbrand int i; \ 4342bf3ee38SDavid Hildenbrand \ 4352bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 4362bf3ee38SDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 4372bf3ee38SDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 4382bf3ee38SDavid Hildenbrand \ 4392bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 4402bf3ee38SDavid Hildenbrand } \ 4412bf3ee38SDavid Hildenbrand } 4422bf3ee38SDavid Hildenbrand DEF_VMH(8) 4432bf3ee38SDavid Hildenbrand DEF_VMH(16) 4442bf3ee38SDavid Hildenbrand 4452bf3ee38SDavid Hildenbrand #define DEF_VMLH(BITS) \ 4462bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 4472bf3ee38SDavid Hildenbrand uint32_t desc) \ 4482bf3ee38SDavid Hildenbrand { \ 4492bf3ee38SDavid Hildenbrand int i; \ 4502bf3ee38SDavid Hildenbrand \ 4512bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 4522bf3ee38SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 4532bf3ee38SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 4542bf3ee38SDavid Hildenbrand \ 4552bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 4562bf3ee38SDavid Hildenbrand } \ 4572bf3ee38SDavid Hildenbrand } 4582bf3ee38SDavid Hildenbrand DEF_VMLH(8) 4592bf3ee38SDavid Hildenbrand DEF_VMLH(16) 4602bf3ee38SDavid Hildenbrand 4612bf3ee38SDavid Hildenbrand #define DEF_VME(BITS, TBITS) \ 4622bf3ee38SDavid Hildenbrand void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 4632bf3ee38SDavid Hildenbrand uint32_t desc) \ 4642bf3ee38SDavid Hildenbrand { \ 4652bf3ee38SDavid Hildenbrand int i, j; \ 4662bf3ee38SDavid Hildenbrand \ 4672bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 4682bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 4692bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 4702bf3ee38SDavid Hildenbrand \ 4712bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4722bf3ee38SDavid Hildenbrand } \ 4732bf3ee38SDavid Hildenbrand } 4742bf3ee38SDavid Hildenbrand DEF_VME(8, 16) 4752bf3ee38SDavid Hildenbrand DEF_VME(16, 32) 4762bf3ee38SDavid Hildenbrand DEF_VME(32, 64) 4772bf3ee38SDavid Hildenbrand 4782bf3ee38SDavid Hildenbrand #define DEF_VMLE(BITS, TBITS) \ 4792bf3ee38SDavid Hildenbrand void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 4802bf3ee38SDavid Hildenbrand uint32_t desc) \ 4812bf3ee38SDavid Hildenbrand { \ 4822bf3ee38SDavid Hildenbrand int i, j; \ 4832bf3ee38SDavid Hildenbrand \ 4842bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 4852bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 4862bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 4872bf3ee38SDavid Hildenbrand \ 4882bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4892bf3ee38SDavid Hildenbrand } \ 4902bf3ee38SDavid Hildenbrand } 4912bf3ee38SDavid Hildenbrand DEF_VMLE(8, 16) 4922bf3ee38SDavid Hildenbrand DEF_VMLE(16, 32) 4932bf3ee38SDavid Hildenbrand DEF_VMLE(32, 64) 4942bf3ee38SDavid Hildenbrand 4952bf3ee38SDavid Hildenbrand #define DEF_VMO(BITS, TBITS) \ 4962bf3ee38SDavid Hildenbrand void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 4972bf3ee38SDavid Hildenbrand uint32_t desc) \ 4982bf3ee38SDavid Hildenbrand { \ 4992bf3ee38SDavid Hildenbrand int i, j; \ 5002bf3ee38SDavid Hildenbrand \ 5012bf3ee38SDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 5022bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 5032bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 5042bf3ee38SDavid Hildenbrand \ 5052bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 5062bf3ee38SDavid Hildenbrand } \ 5072bf3ee38SDavid Hildenbrand } 5082bf3ee38SDavid Hildenbrand DEF_VMO(8, 16) 5092bf3ee38SDavid Hildenbrand DEF_VMO(16, 32) 5102bf3ee38SDavid Hildenbrand DEF_VMO(32, 64) 5112bf3ee38SDavid Hildenbrand 5122bf3ee38SDavid Hildenbrand #define DEF_VMLO(BITS, TBITS) \ 5132bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 5142bf3ee38SDavid Hildenbrand uint32_t desc) \ 5152bf3ee38SDavid Hildenbrand { \ 5162bf3ee38SDavid Hildenbrand int i, j; \ 5172bf3ee38SDavid Hildenbrand \ 51849a7ce4eSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 5192bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 5202bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 5212bf3ee38SDavid Hildenbrand \ 5222bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 5232bf3ee38SDavid Hildenbrand } \ 5242bf3ee38SDavid Hildenbrand } 5252bf3ee38SDavid Hildenbrand DEF_VMLO(8, 16) 5262bf3ee38SDavid Hildenbrand DEF_VMLO(16, 32) 5272bf3ee38SDavid Hildenbrand DEF_VMLO(32, 64) 528c3838aaaSDavid Hildenbrand 529c3838aaaSDavid Hildenbrand #define DEF_VPOPCT(BITS) \ 530c3838aaaSDavid Hildenbrand void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ 531c3838aaaSDavid Hildenbrand { \ 532c3838aaaSDavid Hildenbrand int i; \ 533c3838aaaSDavid Hildenbrand \ 534c3838aaaSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 535c3838aaaSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 536c3838aaaSDavid Hildenbrand \ 537c3838aaaSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ 538c3838aaaSDavid Hildenbrand } \ 539c3838aaaSDavid Hildenbrand } 540c3838aaaSDavid Hildenbrand DEF_VPOPCT(8) 541c3838aaaSDavid Hildenbrand DEF_VPOPCT(16) 54255236da2SDavid Hildenbrand 5435c4b0ab4SDavid Hildenbrand #define DEF_VERIM(BITS) \ 5445c4b0ab4SDavid Hildenbrand void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ 5455c4b0ab4SDavid Hildenbrand uint32_t desc) \ 5465c4b0ab4SDavid Hildenbrand { \ 5475c4b0ab4SDavid Hildenbrand const uint8_t count = simd_data(desc); \ 5485c4b0ab4SDavid Hildenbrand int i; \ 5495c4b0ab4SDavid Hildenbrand \ 5505c4b0ab4SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 5515c4b0ab4SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ 5525c4b0ab4SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ 5535c4b0ab4SDavid Hildenbrand const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ 5545c4b0ab4SDavid Hildenbrand const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ 5555c4b0ab4SDavid Hildenbrand \ 5565c4b0ab4SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, d); \ 5575c4b0ab4SDavid Hildenbrand } \ 5585c4b0ab4SDavid Hildenbrand } 5595c4b0ab4SDavid Hildenbrand DEF_VERIM(8) 5605c4b0ab4SDavid Hildenbrand DEF_VERIM(16) 561dea33fc3SDavid Hildenbrand 562dea33fc3SDavid Hildenbrand void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, 563dea33fc3SDavid Hildenbrand uint32_t desc) 564dea33fc3SDavid Hildenbrand { 565dea33fc3SDavid Hildenbrand s390_vec_shl(v1, v2, count); 566dea33fc3SDavid Hildenbrand } 5675f724887SDavid Hildenbrand 568b7a50eb7SDavid Miller void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3, 569b7a50eb7SDavid Miller uint32_t desc) 570b7a50eb7SDavid Miller { 571b7a50eb7SDavid Miller S390Vector tmp; 572b7a50eb7SDavid Miller uint32_t sh, e0, e1 = 0; 573b7a50eb7SDavid Miller int i; 574b7a50eb7SDavid Miller 575b7a50eb7SDavid Miller for (i = 15; i >= 0; --i, e1 = e0) { 576b7a50eb7SDavid Miller e0 = s390_vec_read_element8(v2, i); 577b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7; 578b7a50eb7SDavid Miller 579b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh)); 580b7a50eb7SDavid Miller } 581b7a50eb7SDavid Miller 582b7a50eb7SDavid Miller *(S390Vector *)v1 = tmp; 583b7a50eb7SDavid Miller } 584b7a50eb7SDavid Miller 5855f724887SDavid Hildenbrand void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, 5865f724887SDavid Hildenbrand uint32_t desc) 5875f724887SDavid Hildenbrand { 5885f724887SDavid Hildenbrand s390_vec_sar(v1, v2, count); 5895f724887SDavid Hildenbrand } 5908112274fSDavid Hildenbrand 591b7a50eb7SDavid Miller void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3, 592b7a50eb7SDavid Miller uint32_t desc) 593b7a50eb7SDavid Miller { 594b7a50eb7SDavid Miller S390Vector tmp; 595b7a50eb7SDavid Miller uint32_t sh, e0, e1 = 0; 596b7a50eb7SDavid Miller int i = 0; 597b7a50eb7SDavid Miller 598b7a50eb7SDavid Miller /* Byte 0 is special only. */ 599b7a50eb7SDavid Miller e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i); 600b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7; 601b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, e0 >> sh); 602b7a50eb7SDavid Miller 603b7a50eb7SDavid Miller e1 = e0; 604b7a50eb7SDavid Miller for (i = 1; i < 16; ++i, e1 = e0) { 605b7a50eb7SDavid Miller e0 = s390_vec_read_element8(v2, i); 606b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7; 607b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh); 608b7a50eb7SDavid Miller } 609b7a50eb7SDavid Miller 610b7a50eb7SDavid Miller *(S390Vector *)v1 = tmp; 611b7a50eb7SDavid Miller } 612b7a50eb7SDavid Miller 6138112274fSDavid Hildenbrand void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, 6148112274fSDavid Hildenbrand uint32_t desc) 6158112274fSDavid Hildenbrand { 6168112274fSDavid Hildenbrand s390_vec_shr(v1, v2, count); 6178112274fSDavid Hildenbrand } 6181ee2d7baSDavid Hildenbrand 619b7a50eb7SDavid Miller void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3, 620b7a50eb7SDavid Miller uint32_t desc) 621b7a50eb7SDavid Miller { 622b7a50eb7SDavid Miller S390Vector tmp; 623b7a50eb7SDavid Miller uint32_t sh, e0, e1 = 0; 624b7a50eb7SDavid Miller 625b7a50eb7SDavid Miller for (int i = 0; i < 16; ++i, e1 = e0) { 626b7a50eb7SDavid Miller e0 = s390_vec_read_element8(v2, i); 627b7a50eb7SDavid Miller sh = s390_vec_read_element8(v3, i) & 7; 628b7a50eb7SDavid Miller 629b7a50eb7SDavid Miller s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh); 630b7a50eb7SDavid Miller } 631b7a50eb7SDavid Miller 632b7a50eb7SDavid Miller *(S390Vector *)v1 = tmp; 633b7a50eb7SDavid Miller } 634b7a50eb7SDavid Miller 6351ee2d7baSDavid Hildenbrand #define DEF_VSCBI(BITS) \ 6361ee2d7baSDavid Hildenbrand void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ 6371ee2d7baSDavid Hildenbrand uint32_t desc) \ 6381ee2d7baSDavid Hildenbrand { \ 6391ee2d7baSDavid Hildenbrand int i; \ 6401ee2d7baSDavid Hildenbrand \ 6411ee2d7baSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 6421ee2d7baSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 6431ee2d7baSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 6441ee2d7baSDavid Hildenbrand \ 64523e79774SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a >= b); \ 6461ee2d7baSDavid Hildenbrand } \ 6471ee2d7baSDavid Hildenbrand } 6481ee2d7baSDavid Hildenbrand DEF_VSCBI(8) 6491ee2d7baSDavid Hildenbrand DEF_VSCBI(16) 650db156ebfSDavid Hildenbrand 651db156ebfSDavid Hildenbrand void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env, 652db156ebfSDavid Hildenbrand uint32_t desc) 653db156ebfSDavid Hildenbrand { 654db156ebfSDavid Hildenbrand S390Vector tmp; 655db156ebfSDavid Hildenbrand 656db156ebfSDavid Hildenbrand s390_vec_and(&tmp, v1, v2); 657db156ebfSDavid Hildenbrand if (s390_vec_is_zero(&tmp)) { 658db156ebfSDavid Hildenbrand /* Selected bits all zeros; or all mask bits zero */ 659db156ebfSDavid Hildenbrand env->cc_op = 0; 660db156ebfSDavid Hildenbrand } else if (s390_vec_equal(&tmp, v2)) { 661db156ebfSDavid Hildenbrand /* Selected bits all ones */ 662db156ebfSDavid Hildenbrand env->cc_op = 3; 663db156ebfSDavid Hildenbrand } else { 664db156ebfSDavid Hildenbrand /* Selected bits a mix of zeros and ones */ 665db156ebfSDavid Hildenbrand env->cc_op = 1; 666db156ebfSDavid Hildenbrand } 667db156ebfSDavid Hildenbrand } 668