1c1a81d4bSDavid Hildenbrand /* 2c1a81d4bSDavid Hildenbrand * QEMU TCG support -- s390x vector integer instruction support 3c1a81d4bSDavid Hildenbrand * 4c1a81d4bSDavid Hildenbrand * Copyright (C) 2019 Red Hat Inc 5c1a81d4bSDavid Hildenbrand * 6c1a81d4bSDavid Hildenbrand * Authors: 7c1a81d4bSDavid Hildenbrand * David Hildenbrand <david@redhat.com> 8c1a81d4bSDavid Hildenbrand * 9c1a81d4bSDavid Hildenbrand * This work is licensed under the terms of the GNU GPL, version 2 or later. 10c1a81d4bSDavid Hildenbrand * See the COPYING file in the top-level directory. 11c1a81d4bSDavid Hildenbrand */ 12c1a81d4bSDavid Hildenbrand #include "qemu/osdep.h" 13c1a81d4bSDavid Hildenbrand #include "qemu-common.h" 14c1a81d4bSDavid Hildenbrand #include "cpu.h" 15c1a81d4bSDavid Hildenbrand #include "vec.h" 16c1a81d4bSDavid Hildenbrand #include "exec/helper-proto.h" 175c4b0ab4SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h" 18c1a81d4bSDavid Hildenbrand 19697a45d6SDavid Hildenbrand static bool s390_vec_is_zero(const S390Vector *v) 20697a45d6SDavid Hildenbrand { 21697a45d6SDavid Hildenbrand return !v->doubleword[0] && !v->doubleword[1]; 22697a45d6SDavid Hildenbrand } 23697a45d6SDavid Hildenbrand 24697a45d6SDavid Hildenbrand static void s390_vec_xor(S390Vector *res, const S390Vector *a, 25697a45d6SDavid Hildenbrand const S390Vector *b) 26697a45d6SDavid Hildenbrand { 27697a45d6SDavid Hildenbrand res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 28697a45d6SDavid Hildenbrand res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 29697a45d6SDavid Hildenbrand } 30697a45d6SDavid Hildenbrand 31697a45d6SDavid Hildenbrand static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 32697a45d6SDavid Hildenbrand { 33697a45d6SDavid Hildenbrand uint64_t tmp; 34697a45d6SDavid Hildenbrand 35697a45d6SDavid Hildenbrand g_assert(count < 128); 36697a45d6SDavid Hildenbrand if (count == 0) { 37697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 38697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 39697a45d6SDavid Hildenbrand } else if (count == 64) { 40697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1]; 41697a45d6SDavid Hildenbrand d->doubleword[1] = 0; 42697a45d6SDavid Hildenbrand } else if (count < 64) { 43697a45d6SDavid Hildenbrand tmp = extract64(a->doubleword[1], 64 - count, count); 44697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1] << count; 45697a45d6SDavid Hildenbrand d->doubleword[0] = (a->doubleword[0] << count) | tmp; 46697a45d6SDavid Hildenbrand } else { 47697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1] << (count - 64); 48697a45d6SDavid Hildenbrand d->doubleword[1] = 0; 49697a45d6SDavid Hildenbrand } 50697a45d6SDavid Hildenbrand } 51697a45d6SDavid Hildenbrand 525f724887SDavid Hildenbrand static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) 535f724887SDavid Hildenbrand { 545f724887SDavid Hildenbrand uint64_t tmp; 555f724887SDavid Hildenbrand 565f724887SDavid Hildenbrand if (count == 0) { 575f724887SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 585f724887SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 595f724887SDavid Hildenbrand } else if (count == 64) { 605f724887SDavid Hildenbrand d->doubleword[1] = a->doubleword[0]; 615f724887SDavid Hildenbrand d->doubleword[0] = 0; 625f724887SDavid Hildenbrand } else if (count < 64) { 635f724887SDavid Hildenbrand tmp = a->doubleword[1] >> count; 645f724887SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 655f724887SDavid Hildenbrand d->doubleword[0] = (int64_t)a->doubleword[0] >> count; 665f724887SDavid Hildenbrand } else { 675f724887SDavid Hildenbrand d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); 685f724887SDavid Hildenbrand d->doubleword[0] = 0; 695f724887SDavid Hildenbrand } 705f724887SDavid Hildenbrand } 715f724887SDavid Hildenbrand 72697a45d6SDavid Hildenbrand static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 73697a45d6SDavid Hildenbrand { 74697a45d6SDavid Hildenbrand uint64_t tmp; 75697a45d6SDavid Hildenbrand 76697a45d6SDavid Hildenbrand g_assert(count < 128); 77697a45d6SDavid Hildenbrand if (count == 0) { 78697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 79697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 80697a45d6SDavid Hildenbrand } else if (count == 64) { 81697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0]; 82697a45d6SDavid Hildenbrand d->doubleword[0] = 0; 83697a45d6SDavid Hildenbrand } else if (count < 64) { 84697a45d6SDavid Hildenbrand tmp = a->doubleword[1] >> count; 85697a45d6SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 86697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0] >> count; 87697a45d6SDavid Hildenbrand } else { 88697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0] >> (count - 64); 89697a45d6SDavid Hildenbrand d->doubleword[0] = 0; 90697a45d6SDavid Hildenbrand } 91697a45d6SDavid Hildenbrand } 92c1a81d4bSDavid Hildenbrand #define DEF_VAVG(BITS) \ 93c1a81d4bSDavid Hildenbrand void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 94c1a81d4bSDavid Hildenbrand uint32_t desc) \ 95c1a81d4bSDavid Hildenbrand { \ 96c1a81d4bSDavid Hildenbrand int i; \ 97c1a81d4bSDavid Hildenbrand \ 98c1a81d4bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 99c1a81d4bSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 100c1a81d4bSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 101c1a81d4bSDavid Hildenbrand \ 102c1a81d4bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 103c1a81d4bSDavid Hildenbrand } \ 104c1a81d4bSDavid Hildenbrand } 105c1a81d4bSDavid Hildenbrand DEF_VAVG(8) 106c1a81d4bSDavid Hildenbrand DEF_VAVG(16) 107801aa78bSDavid Hildenbrand 108801aa78bSDavid Hildenbrand #define DEF_VAVGL(BITS) \ 109801aa78bSDavid Hildenbrand void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 110801aa78bSDavid Hildenbrand uint32_t desc) \ 111801aa78bSDavid Hildenbrand { \ 112801aa78bSDavid Hildenbrand int i; \ 113801aa78bSDavid Hildenbrand \ 114801aa78bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 115801aa78bSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 116801aa78bSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 117801aa78bSDavid Hildenbrand \ 118801aa78bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 119801aa78bSDavid Hildenbrand } \ 120801aa78bSDavid Hildenbrand } 121801aa78bSDavid Hildenbrand DEF_VAVGL(8) 122801aa78bSDavid Hildenbrand DEF_VAVGL(16) 12328863f1dSDavid Hildenbrand 12428863f1dSDavid Hildenbrand #define DEF_VCLZ(BITS) \ 12528863f1dSDavid Hildenbrand void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 12628863f1dSDavid Hildenbrand { \ 12728863f1dSDavid Hildenbrand int i; \ 12828863f1dSDavid Hildenbrand \ 12928863f1dSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 13028863f1dSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 13128863f1dSDavid Hildenbrand \ 13228863f1dSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 13328863f1dSDavid Hildenbrand } \ 13428863f1dSDavid Hildenbrand } 13528863f1dSDavid Hildenbrand DEF_VCLZ(8) 13628863f1dSDavid Hildenbrand DEF_VCLZ(16) 137449a8ac2SDavid Hildenbrand 138449a8ac2SDavid Hildenbrand #define DEF_VCTZ(BITS) \ 139449a8ac2SDavid Hildenbrand void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 140449a8ac2SDavid Hildenbrand { \ 141449a8ac2SDavid Hildenbrand int i; \ 142449a8ac2SDavid Hildenbrand \ 143449a8ac2SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 144449a8ac2SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 145449a8ac2SDavid Hildenbrand \ 146449a8ac2SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 147449a8ac2SDavid Hildenbrand } \ 148449a8ac2SDavid Hildenbrand } 149449a8ac2SDavid Hildenbrand DEF_VCTZ(8) 150449a8ac2SDavid Hildenbrand DEF_VCTZ(16) 151697a45d6SDavid Hildenbrand 152697a45d6SDavid Hildenbrand /* like binary multiplication, but XOR instead of addition */ 153697a45d6SDavid Hildenbrand #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ 154697a45d6SDavid Hildenbrand static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ 155697a45d6SDavid Hildenbrand uint##TBITS##_t b) \ 156697a45d6SDavid Hildenbrand { \ 157697a45d6SDavid Hildenbrand uint##TBITS##_t res = 0; \ 158697a45d6SDavid Hildenbrand \ 159697a45d6SDavid Hildenbrand while (b) { \ 160697a45d6SDavid Hildenbrand if (b & 0x1) { \ 161697a45d6SDavid Hildenbrand res = res ^ a; \ 162697a45d6SDavid Hildenbrand } \ 163697a45d6SDavid Hildenbrand a = a << 1; \ 164697a45d6SDavid Hildenbrand b = b >> 1; \ 165697a45d6SDavid Hildenbrand } \ 166697a45d6SDavid Hildenbrand return res; \ 167697a45d6SDavid Hildenbrand } 168697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(8, 16) 169697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(16, 32) 170697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(32, 64) 171697a45d6SDavid Hildenbrand 172697a45d6SDavid Hildenbrand static S390Vector galois_multiply64(uint64_t a, uint64_t b) 173697a45d6SDavid Hildenbrand { 174697a45d6SDavid Hildenbrand S390Vector res = {}; 175697a45d6SDavid Hildenbrand S390Vector va = { 176697a45d6SDavid Hildenbrand .doubleword[1] = a, 177697a45d6SDavid Hildenbrand }; 178697a45d6SDavid Hildenbrand S390Vector vb = { 179697a45d6SDavid Hildenbrand .doubleword[1] = b, 180697a45d6SDavid Hildenbrand }; 181697a45d6SDavid Hildenbrand 182697a45d6SDavid Hildenbrand while (!s390_vec_is_zero(&vb)) { 183697a45d6SDavid Hildenbrand if (vb.doubleword[1] & 0x1) { 184697a45d6SDavid Hildenbrand s390_vec_xor(&res, &res, &va); 185697a45d6SDavid Hildenbrand } 186697a45d6SDavid Hildenbrand s390_vec_shl(&va, &va, 1); 187697a45d6SDavid Hildenbrand s390_vec_shr(&vb, &vb, 1); 188697a45d6SDavid Hildenbrand } 189697a45d6SDavid Hildenbrand return res; 190697a45d6SDavid Hildenbrand } 191697a45d6SDavid Hildenbrand 192697a45d6SDavid Hildenbrand #define DEF_VGFM(BITS, TBITS) \ 193697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ 194697a45d6SDavid Hildenbrand uint32_t desc) \ 195697a45d6SDavid Hildenbrand { \ 196697a45d6SDavid Hildenbrand int i; \ 197697a45d6SDavid Hildenbrand \ 198697a45d6SDavid Hildenbrand for (i = 0; i < (128 / TBITS); i++) { \ 199697a45d6SDavid Hildenbrand uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 200697a45d6SDavid Hildenbrand uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 201697a45d6SDavid Hildenbrand uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 202697a45d6SDavid Hildenbrand \ 203697a45d6SDavid Hildenbrand a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 204697a45d6SDavid Hildenbrand b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 205697a45d6SDavid Hildenbrand d = d ^ galois_multiply32(a, b); \ 206697a45d6SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, d); \ 207697a45d6SDavid Hildenbrand } \ 208697a45d6SDavid Hildenbrand } 209697a45d6SDavid Hildenbrand DEF_VGFM(8, 16) 210697a45d6SDavid Hildenbrand DEF_VGFM(16, 32) 211697a45d6SDavid Hildenbrand DEF_VGFM(32, 64) 212697a45d6SDavid Hildenbrand 213697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 214697a45d6SDavid Hildenbrand uint32_t desc) 215697a45d6SDavid Hildenbrand { 216697a45d6SDavid Hildenbrand S390Vector tmp1, tmp2; 217697a45d6SDavid Hildenbrand uint64_t a, b; 218697a45d6SDavid Hildenbrand 219697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 0); 220697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 0); 221697a45d6SDavid Hildenbrand tmp1 = galois_multiply64(a, b); 222697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 1); 223697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 1); 224697a45d6SDavid Hildenbrand tmp2 = galois_multiply64(a, b); 225697a45d6SDavid Hildenbrand s390_vec_xor(v1, &tmp1, &tmp2); 226697a45d6SDavid Hildenbrand } 227697a45d6SDavid Hildenbrand 228697a45d6SDavid Hildenbrand #define DEF_VGFMA(BITS, TBITS) \ 229697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ 230697a45d6SDavid Hildenbrand const void *v4, uint32_t desc) \ 231697a45d6SDavid Hildenbrand { \ 232697a45d6SDavid Hildenbrand int i; \ 233697a45d6SDavid Hildenbrand \ 234697a45d6SDavid Hildenbrand for (i = 0; i < (128 / TBITS); i++) { \ 235697a45d6SDavid Hildenbrand uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 236697a45d6SDavid Hildenbrand uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 237697a45d6SDavid Hildenbrand uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 238697a45d6SDavid Hildenbrand \ 239697a45d6SDavid Hildenbrand a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 240697a45d6SDavid Hildenbrand b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 241697a45d6SDavid Hildenbrand d = d ^ galois_multiply32(a, b); \ 242697a45d6SDavid Hildenbrand d = d ^ s390_vec_read_element##TBITS(v4, i); \ 243697a45d6SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, d); \ 244697a45d6SDavid Hildenbrand } \ 245697a45d6SDavid Hildenbrand } 246697a45d6SDavid Hildenbrand DEF_VGFMA(8, 16) 247697a45d6SDavid Hildenbrand DEF_VGFMA(16, 32) 248697a45d6SDavid Hildenbrand DEF_VGFMA(32, 64) 249697a45d6SDavid Hildenbrand 250697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 251697a45d6SDavid Hildenbrand const void *v4, uint32_t desc) 252697a45d6SDavid Hildenbrand { 253697a45d6SDavid Hildenbrand S390Vector tmp1, tmp2; 254697a45d6SDavid Hildenbrand uint64_t a, b; 255697a45d6SDavid Hildenbrand 256697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 0); 257697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 0); 258697a45d6SDavid Hildenbrand tmp1 = galois_multiply64(a, b); 259697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 1); 260697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 1); 261697a45d6SDavid Hildenbrand tmp2 = galois_multiply64(a, b); 262697a45d6SDavid Hildenbrand s390_vec_xor(&tmp1, &tmp1, &tmp2); 263697a45d6SDavid Hildenbrand s390_vec_xor(v1, &tmp1, v4); 264697a45d6SDavid Hildenbrand } 2651b430aecSDavid Hildenbrand 2661b430aecSDavid Hildenbrand #define DEF_VMAL(BITS) \ 2671b430aecSDavid Hildenbrand void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 2681b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 2691b430aecSDavid Hildenbrand { \ 2701b430aecSDavid Hildenbrand int i; \ 2711b430aecSDavid Hildenbrand \ 2721b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 2731b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 2741b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 2751b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 2761b430aecSDavid Hildenbrand \ 2771b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a * b + c); \ 2781b430aecSDavid Hildenbrand } \ 2791b430aecSDavid Hildenbrand } 2801b430aecSDavid Hildenbrand DEF_VMAL(8) 2811b430aecSDavid Hildenbrand DEF_VMAL(16) 2821b430aecSDavid Hildenbrand 2831b430aecSDavid Hildenbrand #define DEF_VMAH(BITS) \ 2841b430aecSDavid Hildenbrand void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 2851b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 2861b430aecSDavid Hildenbrand { \ 2871b430aecSDavid Hildenbrand int i; \ 2881b430aecSDavid Hildenbrand \ 2891b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 2901b430aecSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 2911b430aecSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 2921b430aecSDavid Hildenbrand const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 2931b430aecSDavid Hildenbrand \ 2941b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 2951b430aecSDavid Hildenbrand } \ 2961b430aecSDavid Hildenbrand } 2971b430aecSDavid Hildenbrand DEF_VMAH(8) 2981b430aecSDavid Hildenbrand DEF_VMAH(16) 2991b430aecSDavid Hildenbrand 3001b430aecSDavid Hildenbrand #define DEF_VMALH(BITS) \ 3011b430aecSDavid Hildenbrand void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 3021b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3031b430aecSDavid Hildenbrand { \ 3041b430aecSDavid Hildenbrand int i; \ 3051b430aecSDavid Hildenbrand \ 3061b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 3071b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 3081b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 3091b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 3101b430aecSDavid Hildenbrand \ 3111b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 3121b430aecSDavid Hildenbrand } \ 3131b430aecSDavid Hildenbrand } 3141b430aecSDavid Hildenbrand DEF_VMALH(8) 3151b430aecSDavid Hildenbrand DEF_VMALH(16) 3161b430aecSDavid Hildenbrand 3171b430aecSDavid Hildenbrand #define DEF_VMAE(BITS, TBITS) \ 3181b430aecSDavid Hildenbrand void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 3191b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3201b430aecSDavid Hildenbrand { \ 3211b430aecSDavid Hildenbrand int i, j; \ 3221b430aecSDavid Hildenbrand \ 3231b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 3241b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 3251b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 3261b430aecSDavid Hildenbrand int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \ 3271b430aecSDavid Hildenbrand \ 3281b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3291b430aecSDavid Hildenbrand } \ 3301b430aecSDavid Hildenbrand } 3311b430aecSDavid Hildenbrand DEF_VMAE(8, 16) 3321b430aecSDavid Hildenbrand DEF_VMAE(16, 32) 3331b430aecSDavid Hildenbrand DEF_VMAE(32, 64) 3341b430aecSDavid Hildenbrand 3351b430aecSDavid Hildenbrand #define DEF_VMALE(BITS, TBITS) \ 3361b430aecSDavid Hildenbrand void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 3371b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3381b430aecSDavid Hildenbrand { \ 3391b430aecSDavid Hildenbrand int i, j; \ 3401b430aecSDavid Hildenbrand \ 3411b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 3421b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 3431b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 3441b430aecSDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \ 3451b430aecSDavid Hildenbrand \ 3461b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3471b430aecSDavid Hildenbrand } \ 3481b430aecSDavid Hildenbrand } 3491b430aecSDavid Hildenbrand DEF_VMALE(8, 16) 3501b430aecSDavid Hildenbrand DEF_VMALE(16, 32) 3511b430aecSDavid Hildenbrand DEF_VMALE(32, 64) 3521b430aecSDavid Hildenbrand 3531b430aecSDavid Hildenbrand #define DEF_VMAO(BITS, TBITS) \ 3541b430aecSDavid Hildenbrand void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 3551b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3561b430aecSDavid Hildenbrand { \ 3571b430aecSDavid Hildenbrand int i, j; \ 3581b430aecSDavid Hildenbrand \ 3591b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 3601b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 3611b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 3621b430aecSDavid Hildenbrand int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \ 3631b430aecSDavid Hildenbrand \ 3641b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3651b430aecSDavid Hildenbrand } \ 3661b430aecSDavid Hildenbrand } 3671b430aecSDavid Hildenbrand DEF_VMAO(8, 16) 3681b430aecSDavid Hildenbrand DEF_VMAO(16, 32) 3691b430aecSDavid Hildenbrand DEF_VMAO(32, 64) 3701b430aecSDavid Hildenbrand 3711b430aecSDavid Hildenbrand #define DEF_VMALO(BITS, TBITS) \ 3721b430aecSDavid Hildenbrand void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 3731b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3741b430aecSDavid Hildenbrand { \ 3751b430aecSDavid Hildenbrand int i, j; \ 3761b430aecSDavid Hildenbrand \ 3771b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 3781b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 3791b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 3801b430aecSDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \ 3811b430aecSDavid Hildenbrand \ 3821b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3831b430aecSDavid Hildenbrand } \ 3841b430aecSDavid Hildenbrand } 3851b430aecSDavid Hildenbrand DEF_VMALO(8, 16) 3861b430aecSDavid Hildenbrand DEF_VMALO(16, 32) 3871b430aecSDavid Hildenbrand DEF_VMALO(32, 64) 3882bf3ee38SDavid Hildenbrand 3892bf3ee38SDavid Hildenbrand #define DEF_VMH(BITS) \ 3902bf3ee38SDavid Hildenbrand void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 3912bf3ee38SDavid Hildenbrand uint32_t desc) \ 3922bf3ee38SDavid Hildenbrand { \ 3932bf3ee38SDavid Hildenbrand int i; \ 3942bf3ee38SDavid Hildenbrand \ 3952bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 3962bf3ee38SDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 3972bf3ee38SDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 3982bf3ee38SDavid Hildenbrand \ 3992bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 4002bf3ee38SDavid Hildenbrand } \ 4012bf3ee38SDavid Hildenbrand } 4022bf3ee38SDavid Hildenbrand DEF_VMH(8) 4032bf3ee38SDavid Hildenbrand DEF_VMH(16) 4042bf3ee38SDavid Hildenbrand 4052bf3ee38SDavid Hildenbrand #define DEF_VMLH(BITS) \ 4062bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 4072bf3ee38SDavid Hildenbrand uint32_t desc) \ 4082bf3ee38SDavid Hildenbrand { \ 4092bf3ee38SDavid Hildenbrand int i; \ 4102bf3ee38SDavid Hildenbrand \ 4112bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 4122bf3ee38SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 4132bf3ee38SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 4142bf3ee38SDavid Hildenbrand \ 4152bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 4162bf3ee38SDavid Hildenbrand } \ 4172bf3ee38SDavid Hildenbrand } 4182bf3ee38SDavid Hildenbrand DEF_VMLH(8) 4192bf3ee38SDavid Hildenbrand DEF_VMLH(16) 4202bf3ee38SDavid Hildenbrand 4212bf3ee38SDavid Hildenbrand #define DEF_VME(BITS, TBITS) \ 4222bf3ee38SDavid Hildenbrand void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 4232bf3ee38SDavid Hildenbrand uint32_t desc) \ 4242bf3ee38SDavid Hildenbrand { \ 4252bf3ee38SDavid Hildenbrand int i, j; \ 4262bf3ee38SDavid Hildenbrand \ 4272bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 4282bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 4292bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 4302bf3ee38SDavid Hildenbrand \ 4312bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4322bf3ee38SDavid Hildenbrand } \ 4332bf3ee38SDavid Hildenbrand } 4342bf3ee38SDavid Hildenbrand DEF_VME(8, 16) 4352bf3ee38SDavid Hildenbrand DEF_VME(16, 32) 4362bf3ee38SDavid Hildenbrand DEF_VME(32, 64) 4372bf3ee38SDavid Hildenbrand 4382bf3ee38SDavid Hildenbrand #define DEF_VMLE(BITS, TBITS) \ 4392bf3ee38SDavid Hildenbrand void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 4402bf3ee38SDavid Hildenbrand uint32_t desc) \ 4412bf3ee38SDavid Hildenbrand { \ 4422bf3ee38SDavid Hildenbrand int i, j; \ 4432bf3ee38SDavid Hildenbrand \ 4442bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 4452bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 4462bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 4472bf3ee38SDavid Hildenbrand \ 4482bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4492bf3ee38SDavid Hildenbrand } \ 4502bf3ee38SDavid Hildenbrand } 4512bf3ee38SDavid Hildenbrand DEF_VMLE(8, 16) 4522bf3ee38SDavid Hildenbrand DEF_VMLE(16, 32) 4532bf3ee38SDavid Hildenbrand DEF_VMLE(32, 64) 4542bf3ee38SDavid Hildenbrand 4552bf3ee38SDavid Hildenbrand #define DEF_VMO(BITS, TBITS) \ 4562bf3ee38SDavid Hildenbrand void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 4572bf3ee38SDavid Hildenbrand uint32_t desc) \ 4582bf3ee38SDavid Hildenbrand { \ 4592bf3ee38SDavid Hildenbrand int i, j; \ 4602bf3ee38SDavid Hildenbrand \ 4612bf3ee38SDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 4622bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 4632bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 4642bf3ee38SDavid Hildenbrand \ 4652bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4662bf3ee38SDavid Hildenbrand } \ 4672bf3ee38SDavid Hildenbrand } 4682bf3ee38SDavid Hildenbrand DEF_VMO(8, 16) 4692bf3ee38SDavid Hildenbrand DEF_VMO(16, 32) 4702bf3ee38SDavid Hildenbrand DEF_VMO(32, 64) 4712bf3ee38SDavid Hildenbrand 4722bf3ee38SDavid Hildenbrand #define DEF_VMLO(BITS, TBITS) \ 4732bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 4742bf3ee38SDavid Hildenbrand uint32_t desc) \ 4752bf3ee38SDavid Hildenbrand { \ 4762bf3ee38SDavid Hildenbrand int i, j; \ 4772bf3ee38SDavid Hildenbrand \ 4782bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 4792bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 4802bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 4812bf3ee38SDavid Hildenbrand \ 4822bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4832bf3ee38SDavid Hildenbrand } \ 4842bf3ee38SDavid Hildenbrand } 4852bf3ee38SDavid Hildenbrand DEF_VMLO(8, 16) 4862bf3ee38SDavid Hildenbrand DEF_VMLO(16, 32) 4872bf3ee38SDavid Hildenbrand DEF_VMLO(32, 64) 488c3838aaaSDavid Hildenbrand 489c3838aaaSDavid Hildenbrand #define DEF_VPOPCT(BITS) \ 490c3838aaaSDavid Hildenbrand void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ 491c3838aaaSDavid Hildenbrand { \ 492c3838aaaSDavid Hildenbrand int i; \ 493c3838aaaSDavid Hildenbrand \ 494c3838aaaSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 495c3838aaaSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 496c3838aaaSDavid Hildenbrand \ 497c3838aaaSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ 498c3838aaaSDavid Hildenbrand } \ 499c3838aaaSDavid Hildenbrand } 500c3838aaaSDavid Hildenbrand DEF_VPOPCT(8) 501c3838aaaSDavid Hildenbrand DEF_VPOPCT(16) 50255236da2SDavid Hildenbrand 50355236da2SDavid Hildenbrand #define DEF_VERLLV(BITS) \ 50455236da2SDavid Hildenbrand void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3, \ 50555236da2SDavid Hildenbrand uint32_t desc) \ 50655236da2SDavid Hildenbrand { \ 50755236da2SDavid Hildenbrand int i; \ 50855236da2SDavid Hildenbrand \ 50955236da2SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 51055236da2SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 51155236da2SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 51255236da2SDavid Hildenbrand \ 51355236da2SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, rol##BITS(a, b)); \ 51455236da2SDavid Hildenbrand } \ 51555236da2SDavid Hildenbrand } 51655236da2SDavid Hildenbrand DEF_VERLLV(8) 51755236da2SDavid Hildenbrand DEF_VERLLV(16) 51855236da2SDavid Hildenbrand 51955236da2SDavid Hildenbrand #define DEF_VERLL(BITS) \ 52055236da2SDavid Hildenbrand void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count, \ 52155236da2SDavid Hildenbrand uint32_t desc) \ 52255236da2SDavid Hildenbrand { \ 52355236da2SDavid Hildenbrand int i; \ 52455236da2SDavid Hildenbrand \ 52555236da2SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 52655236da2SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 52755236da2SDavid Hildenbrand \ 52855236da2SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, rol##BITS(a, count)); \ 52955236da2SDavid Hildenbrand } \ 53055236da2SDavid Hildenbrand } 53155236da2SDavid Hildenbrand DEF_VERLL(8) 53255236da2SDavid Hildenbrand DEF_VERLL(16) 5335c4b0ab4SDavid Hildenbrand 5345c4b0ab4SDavid Hildenbrand #define DEF_VERIM(BITS) \ 5355c4b0ab4SDavid Hildenbrand void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ 5365c4b0ab4SDavid Hildenbrand uint32_t desc) \ 5375c4b0ab4SDavid Hildenbrand { \ 5385c4b0ab4SDavid Hildenbrand const uint8_t count = simd_data(desc); \ 5395c4b0ab4SDavid Hildenbrand int i; \ 5405c4b0ab4SDavid Hildenbrand \ 5415c4b0ab4SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 5425c4b0ab4SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ 5435c4b0ab4SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ 5445c4b0ab4SDavid Hildenbrand const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ 5455c4b0ab4SDavid Hildenbrand const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ 5465c4b0ab4SDavid Hildenbrand \ 5475c4b0ab4SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, d); \ 5485c4b0ab4SDavid Hildenbrand } \ 5495c4b0ab4SDavid Hildenbrand } 5505c4b0ab4SDavid Hildenbrand DEF_VERIM(8) 5515c4b0ab4SDavid Hildenbrand DEF_VERIM(16) 552dea33fc3SDavid Hildenbrand 553dea33fc3SDavid Hildenbrand void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, 554dea33fc3SDavid Hildenbrand uint32_t desc) 555dea33fc3SDavid Hildenbrand { 556dea33fc3SDavid Hildenbrand s390_vec_shl(v1, v2, count); 557dea33fc3SDavid Hildenbrand } 5585f724887SDavid Hildenbrand 5595f724887SDavid Hildenbrand void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, 5605f724887SDavid Hildenbrand uint32_t desc) 5615f724887SDavid Hildenbrand { 5625f724887SDavid Hildenbrand s390_vec_sar(v1, v2, count); 5635f724887SDavid Hildenbrand } 5648112274fSDavid Hildenbrand 5658112274fSDavid Hildenbrand void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, 5668112274fSDavid Hildenbrand uint32_t desc) 5678112274fSDavid Hildenbrand { 5688112274fSDavid Hildenbrand s390_vec_shr(v1, v2, count); 5698112274fSDavid Hildenbrand } 570*1ee2d7baSDavid Hildenbrand 571*1ee2d7baSDavid Hildenbrand #define DEF_VSCBI(BITS) \ 572*1ee2d7baSDavid Hildenbrand void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ 573*1ee2d7baSDavid Hildenbrand uint32_t desc) \ 574*1ee2d7baSDavid Hildenbrand { \ 575*1ee2d7baSDavid Hildenbrand int i; \ 576*1ee2d7baSDavid Hildenbrand \ 577*1ee2d7baSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 578*1ee2d7baSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 579*1ee2d7baSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 580*1ee2d7baSDavid Hildenbrand \ 581*1ee2d7baSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a < b); \ 582*1ee2d7baSDavid Hildenbrand } \ 583*1ee2d7baSDavid Hildenbrand } 584*1ee2d7baSDavid Hildenbrand DEF_VSCBI(8) 585*1ee2d7baSDavid Hildenbrand DEF_VSCBI(16) 586