1c1a81d4bSDavid Hildenbrand /* 2c1a81d4bSDavid Hildenbrand * QEMU TCG support -- s390x vector integer instruction support 3c1a81d4bSDavid Hildenbrand * 4c1a81d4bSDavid Hildenbrand * Copyright (C) 2019 Red Hat Inc 5c1a81d4bSDavid Hildenbrand * 6c1a81d4bSDavid Hildenbrand * Authors: 7c1a81d4bSDavid Hildenbrand * David Hildenbrand <david@redhat.com> 8c1a81d4bSDavid Hildenbrand * 9c1a81d4bSDavid Hildenbrand * This work is licensed under the terms of the GNU GPL, version 2 or later. 10c1a81d4bSDavid Hildenbrand * See the COPYING file in the top-level directory. 11c1a81d4bSDavid Hildenbrand */ 12c1a81d4bSDavid Hildenbrand #include "qemu/osdep.h" 13c1a81d4bSDavid Hildenbrand #include "qemu-common.h" 14c1a81d4bSDavid Hildenbrand #include "cpu.h" 15c1a81d4bSDavid Hildenbrand #include "vec.h" 16c1a81d4bSDavid Hildenbrand #include "exec/helper-proto.h" 17c1a81d4bSDavid Hildenbrand 18697a45d6SDavid Hildenbrand static bool s390_vec_is_zero(const S390Vector *v) 19697a45d6SDavid Hildenbrand { 20697a45d6SDavid Hildenbrand return !v->doubleword[0] && !v->doubleword[1]; 21697a45d6SDavid Hildenbrand } 22697a45d6SDavid Hildenbrand 23697a45d6SDavid Hildenbrand static void s390_vec_xor(S390Vector *res, const S390Vector *a, 24697a45d6SDavid Hildenbrand const S390Vector *b) 25697a45d6SDavid Hildenbrand { 26697a45d6SDavid Hildenbrand res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 27697a45d6SDavid Hildenbrand res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 28697a45d6SDavid Hildenbrand } 29697a45d6SDavid Hildenbrand 30697a45d6SDavid Hildenbrand static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 31697a45d6SDavid Hildenbrand { 32697a45d6SDavid Hildenbrand uint64_t tmp; 33697a45d6SDavid Hildenbrand 34697a45d6SDavid Hildenbrand g_assert(count < 128); 35697a45d6SDavid Hildenbrand if (count == 0) { 36697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 37697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 38697a45d6SDavid Hildenbrand } else if (count == 64) { 39697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1]; 40697a45d6SDavid Hildenbrand d->doubleword[1] = 0; 41697a45d6SDavid Hildenbrand } else if (count < 64) { 42697a45d6SDavid Hildenbrand tmp = extract64(a->doubleword[1], 64 - count, count); 43697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1] << count; 44697a45d6SDavid Hildenbrand d->doubleword[0] = (a->doubleword[0] << count) | tmp; 45697a45d6SDavid Hildenbrand } else { 46697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1] << (count - 64); 47697a45d6SDavid Hildenbrand d->doubleword[1] = 0; 48697a45d6SDavid Hildenbrand } 49697a45d6SDavid Hildenbrand } 50697a45d6SDavid Hildenbrand 51697a45d6SDavid Hildenbrand static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 52697a45d6SDavid Hildenbrand { 53697a45d6SDavid Hildenbrand uint64_t tmp; 54697a45d6SDavid Hildenbrand 55697a45d6SDavid Hildenbrand g_assert(count < 128); 56697a45d6SDavid Hildenbrand if (count == 0) { 57697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 58697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 59697a45d6SDavid Hildenbrand } else if (count == 64) { 60697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0]; 61697a45d6SDavid Hildenbrand d->doubleword[0] = 0; 62697a45d6SDavid Hildenbrand } else if (count < 64) { 63697a45d6SDavid Hildenbrand tmp = a->doubleword[1] >> count; 64697a45d6SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 65697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0] >> count; 66697a45d6SDavid Hildenbrand } else { 67697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0] >> (count - 64); 68697a45d6SDavid Hildenbrand d->doubleword[0] = 0; 69697a45d6SDavid Hildenbrand } 70697a45d6SDavid Hildenbrand } 71c1a81d4bSDavid Hildenbrand #define DEF_VAVG(BITS) \ 72c1a81d4bSDavid Hildenbrand void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 73c1a81d4bSDavid Hildenbrand uint32_t desc) \ 74c1a81d4bSDavid Hildenbrand { \ 75c1a81d4bSDavid Hildenbrand int i; \ 76c1a81d4bSDavid Hildenbrand \ 77c1a81d4bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 78c1a81d4bSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 79c1a81d4bSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 80c1a81d4bSDavid Hildenbrand \ 81c1a81d4bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 82c1a81d4bSDavid Hildenbrand } \ 83c1a81d4bSDavid Hildenbrand } 84c1a81d4bSDavid Hildenbrand DEF_VAVG(8) 85c1a81d4bSDavid Hildenbrand DEF_VAVG(16) 86801aa78bSDavid Hildenbrand 87801aa78bSDavid Hildenbrand #define DEF_VAVGL(BITS) \ 88801aa78bSDavid Hildenbrand void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 89801aa78bSDavid Hildenbrand uint32_t desc) \ 90801aa78bSDavid Hildenbrand { \ 91801aa78bSDavid Hildenbrand int i; \ 92801aa78bSDavid Hildenbrand \ 93801aa78bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 94801aa78bSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 95801aa78bSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 96801aa78bSDavid Hildenbrand \ 97801aa78bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 98801aa78bSDavid Hildenbrand } \ 99801aa78bSDavid Hildenbrand } 100801aa78bSDavid Hildenbrand DEF_VAVGL(8) 101801aa78bSDavid Hildenbrand DEF_VAVGL(16) 10228863f1dSDavid Hildenbrand 10328863f1dSDavid Hildenbrand #define DEF_VCLZ(BITS) \ 10428863f1dSDavid Hildenbrand void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 10528863f1dSDavid Hildenbrand { \ 10628863f1dSDavid Hildenbrand int i; \ 10728863f1dSDavid Hildenbrand \ 10828863f1dSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 10928863f1dSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 11028863f1dSDavid Hildenbrand \ 11128863f1dSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 11228863f1dSDavid Hildenbrand } \ 11328863f1dSDavid Hildenbrand } 11428863f1dSDavid Hildenbrand DEF_VCLZ(8) 11528863f1dSDavid Hildenbrand DEF_VCLZ(16) 116449a8ac2SDavid Hildenbrand 117449a8ac2SDavid Hildenbrand #define DEF_VCTZ(BITS) \ 118449a8ac2SDavid Hildenbrand void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 119449a8ac2SDavid Hildenbrand { \ 120449a8ac2SDavid Hildenbrand int i; \ 121449a8ac2SDavid Hildenbrand \ 122449a8ac2SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 123449a8ac2SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 124449a8ac2SDavid Hildenbrand \ 125449a8ac2SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 126449a8ac2SDavid Hildenbrand } \ 127449a8ac2SDavid Hildenbrand } 128449a8ac2SDavid Hildenbrand DEF_VCTZ(8) 129449a8ac2SDavid Hildenbrand DEF_VCTZ(16) 130697a45d6SDavid Hildenbrand 131697a45d6SDavid Hildenbrand /* like binary multiplication, but XOR instead of addition */ 132697a45d6SDavid Hildenbrand #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ 133697a45d6SDavid Hildenbrand static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ 134697a45d6SDavid Hildenbrand uint##TBITS##_t b) \ 135697a45d6SDavid Hildenbrand { \ 136697a45d6SDavid Hildenbrand uint##TBITS##_t res = 0; \ 137697a45d6SDavid Hildenbrand \ 138697a45d6SDavid Hildenbrand while (b) { \ 139697a45d6SDavid Hildenbrand if (b & 0x1) { \ 140697a45d6SDavid Hildenbrand res = res ^ a; \ 141697a45d6SDavid Hildenbrand } \ 142697a45d6SDavid Hildenbrand a = a << 1; \ 143697a45d6SDavid Hildenbrand b = b >> 1; \ 144697a45d6SDavid Hildenbrand } \ 145697a45d6SDavid Hildenbrand return res; \ 146697a45d6SDavid Hildenbrand } 147697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(8, 16) 148697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(16, 32) 149697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(32, 64) 150697a45d6SDavid Hildenbrand 151697a45d6SDavid Hildenbrand static S390Vector galois_multiply64(uint64_t a, uint64_t b) 152697a45d6SDavid Hildenbrand { 153697a45d6SDavid Hildenbrand S390Vector res = {}; 154697a45d6SDavid Hildenbrand S390Vector va = { 155697a45d6SDavid Hildenbrand .doubleword[1] = a, 156697a45d6SDavid Hildenbrand }; 157697a45d6SDavid Hildenbrand S390Vector vb = { 158697a45d6SDavid Hildenbrand .doubleword[1] = b, 159697a45d6SDavid Hildenbrand }; 160697a45d6SDavid Hildenbrand 161697a45d6SDavid Hildenbrand while (!s390_vec_is_zero(&vb)) { 162697a45d6SDavid Hildenbrand if (vb.doubleword[1] & 0x1) { 163697a45d6SDavid Hildenbrand s390_vec_xor(&res, &res, &va); 164697a45d6SDavid Hildenbrand } 165697a45d6SDavid Hildenbrand s390_vec_shl(&va, &va, 1); 166697a45d6SDavid Hildenbrand s390_vec_shr(&vb, &vb, 1); 167697a45d6SDavid Hildenbrand } 168697a45d6SDavid Hildenbrand return res; 169697a45d6SDavid Hildenbrand } 170697a45d6SDavid Hildenbrand 171697a45d6SDavid Hildenbrand #define DEF_VGFM(BITS, TBITS) \ 172697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ 173697a45d6SDavid Hildenbrand uint32_t desc) \ 174697a45d6SDavid Hildenbrand { \ 175697a45d6SDavid Hildenbrand int i; \ 176697a45d6SDavid Hildenbrand \ 177697a45d6SDavid Hildenbrand for (i = 0; i < (128 / TBITS); i++) { \ 178697a45d6SDavid Hildenbrand uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 179697a45d6SDavid Hildenbrand uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 180697a45d6SDavid Hildenbrand uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 181697a45d6SDavid Hildenbrand \ 182697a45d6SDavid Hildenbrand a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 183697a45d6SDavid Hildenbrand b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 184697a45d6SDavid Hildenbrand d = d ^ galois_multiply32(a, b); \ 185697a45d6SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, d); \ 186697a45d6SDavid Hildenbrand } \ 187697a45d6SDavid Hildenbrand } 188697a45d6SDavid Hildenbrand DEF_VGFM(8, 16) 189697a45d6SDavid Hildenbrand DEF_VGFM(16, 32) 190697a45d6SDavid Hildenbrand DEF_VGFM(32, 64) 191697a45d6SDavid Hildenbrand 192697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 193697a45d6SDavid Hildenbrand uint32_t desc) 194697a45d6SDavid Hildenbrand { 195697a45d6SDavid Hildenbrand S390Vector tmp1, tmp2; 196697a45d6SDavid Hildenbrand uint64_t a, b; 197697a45d6SDavid Hildenbrand 198697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 0); 199697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 0); 200697a45d6SDavid Hildenbrand tmp1 = galois_multiply64(a, b); 201697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 1); 202697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 1); 203697a45d6SDavid Hildenbrand tmp2 = galois_multiply64(a, b); 204697a45d6SDavid Hildenbrand s390_vec_xor(v1, &tmp1, &tmp2); 205697a45d6SDavid Hildenbrand } 206697a45d6SDavid Hildenbrand 207697a45d6SDavid Hildenbrand #define DEF_VGFMA(BITS, TBITS) \ 208697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ 209697a45d6SDavid Hildenbrand const void *v4, uint32_t desc) \ 210697a45d6SDavid Hildenbrand { \ 211697a45d6SDavid Hildenbrand int i; \ 212697a45d6SDavid Hildenbrand \ 213697a45d6SDavid Hildenbrand for (i = 0; i < (128 / TBITS); i++) { \ 214697a45d6SDavid Hildenbrand uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 215697a45d6SDavid Hildenbrand uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 216697a45d6SDavid Hildenbrand uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 217697a45d6SDavid Hildenbrand \ 218697a45d6SDavid Hildenbrand a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 219697a45d6SDavid Hildenbrand b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 220697a45d6SDavid Hildenbrand d = d ^ galois_multiply32(a, b); \ 221697a45d6SDavid Hildenbrand d = d ^ s390_vec_read_element##TBITS(v4, i); \ 222697a45d6SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, d); \ 223697a45d6SDavid Hildenbrand } \ 224697a45d6SDavid Hildenbrand } 225697a45d6SDavid Hildenbrand DEF_VGFMA(8, 16) 226697a45d6SDavid Hildenbrand DEF_VGFMA(16, 32) 227697a45d6SDavid Hildenbrand DEF_VGFMA(32, 64) 228697a45d6SDavid Hildenbrand 229697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 230697a45d6SDavid Hildenbrand const void *v4, uint32_t desc) 231697a45d6SDavid Hildenbrand { 232697a45d6SDavid Hildenbrand S390Vector tmp1, tmp2; 233697a45d6SDavid Hildenbrand uint64_t a, b; 234697a45d6SDavid Hildenbrand 235697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 0); 236697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 0); 237697a45d6SDavid Hildenbrand tmp1 = galois_multiply64(a, b); 238697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 1); 239697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 1); 240697a45d6SDavid Hildenbrand tmp2 = galois_multiply64(a, b); 241697a45d6SDavid Hildenbrand s390_vec_xor(&tmp1, &tmp1, &tmp2); 242697a45d6SDavid Hildenbrand s390_vec_xor(v1, &tmp1, v4); 243697a45d6SDavid Hildenbrand } 2441b430aecSDavid Hildenbrand 2451b430aecSDavid Hildenbrand #define DEF_VMAL(BITS) \ 2461b430aecSDavid Hildenbrand void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 2471b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 2481b430aecSDavid Hildenbrand { \ 2491b430aecSDavid Hildenbrand int i; \ 2501b430aecSDavid Hildenbrand \ 2511b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 2521b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 2531b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 2541b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 2551b430aecSDavid Hildenbrand \ 2561b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a * b + c); \ 2571b430aecSDavid Hildenbrand } \ 2581b430aecSDavid Hildenbrand } 2591b430aecSDavid Hildenbrand DEF_VMAL(8) 2601b430aecSDavid Hildenbrand DEF_VMAL(16) 2611b430aecSDavid Hildenbrand 2621b430aecSDavid Hildenbrand #define DEF_VMAH(BITS) \ 2631b430aecSDavid Hildenbrand void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 2641b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 2651b430aecSDavid Hildenbrand { \ 2661b430aecSDavid Hildenbrand int i; \ 2671b430aecSDavid Hildenbrand \ 2681b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 2691b430aecSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 2701b430aecSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 2711b430aecSDavid Hildenbrand const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 2721b430aecSDavid Hildenbrand \ 2731b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 2741b430aecSDavid Hildenbrand } \ 2751b430aecSDavid Hildenbrand } 2761b430aecSDavid Hildenbrand DEF_VMAH(8) 2771b430aecSDavid Hildenbrand DEF_VMAH(16) 2781b430aecSDavid Hildenbrand 2791b430aecSDavid Hildenbrand #define DEF_VMALH(BITS) \ 2801b430aecSDavid Hildenbrand void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 2811b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 2821b430aecSDavid Hildenbrand { \ 2831b430aecSDavid Hildenbrand int i; \ 2841b430aecSDavid Hildenbrand \ 2851b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 2861b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 2871b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 2881b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 2891b430aecSDavid Hildenbrand \ 2901b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 2911b430aecSDavid Hildenbrand } \ 2921b430aecSDavid Hildenbrand } 2931b430aecSDavid Hildenbrand DEF_VMALH(8) 2941b430aecSDavid Hildenbrand DEF_VMALH(16) 2951b430aecSDavid Hildenbrand 2961b430aecSDavid Hildenbrand #define DEF_VMAE(BITS, TBITS) \ 2971b430aecSDavid Hildenbrand void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 2981b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 2991b430aecSDavid Hildenbrand { \ 3001b430aecSDavid Hildenbrand int i, j; \ 3011b430aecSDavid Hildenbrand \ 3021b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 3031b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 3041b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 3051b430aecSDavid Hildenbrand int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \ 3061b430aecSDavid Hildenbrand \ 3071b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3081b430aecSDavid Hildenbrand } \ 3091b430aecSDavid Hildenbrand } 3101b430aecSDavid Hildenbrand DEF_VMAE(8, 16) 3111b430aecSDavid Hildenbrand DEF_VMAE(16, 32) 3121b430aecSDavid Hildenbrand DEF_VMAE(32, 64) 3131b430aecSDavid Hildenbrand 3141b430aecSDavid Hildenbrand #define DEF_VMALE(BITS, TBITS) \ 3151b430aecSDavid Hildenbrand void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 3161b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3171b430aecSDavid Hildenbrand { \ 3181b430aecSDavid Hildenbrand int i, j; \ 3191b430aecSDavid Hildenbrand \ 3201b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 3211b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 3221b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 3231b430aecSDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \ 3241b430aecSDavid Hildenbrand \ 3251b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3261b430aecSDavid Hildenbrand } \ 3271b430aecSDavid Hildenbrand } 3281b430aecSDavid Hildenbrand DEF_VMALE(8, 16) 3291b430aecSDavid Hildenbrand DEF_VMALE(16, 32) 3301b430aecSDavid Hildenbrand DEF_VMALE(32, 64) 3311b430aecSDavid Hildenbrand 3321b430aecSDavid Hildenbrand #define DEF_VMAO(BITS, TBITS) \ 3331b430aecSDavid Hildenbrand void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 3341b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3351b430aecSDavid Hildenbrand { \ 3361b430aecSDavid Hildenbrand int i, j; \ 3371b430aecSDavid Hildenbrand \ 3381b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 3391b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 3401b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 3411b430aecSDavid Hildenbrand int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \ 3421b430aecSDavid Hildenbrand \ 3431b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3441b430aecSDavid Hildenbrand } \ 3451b430aecSDavid Hildenbrand } 3461b430aecSDavid Hildenbrand DEF_VMAO(8, 16) 3471b430aecSDavid Hildenbrand DEF_VMAO(16, 32) 3481b430aecSDavid Hildenbrand DEF_VMAO(32, 64) 3491b430aecSDavid Hildenbrand 3501b430aecSDavid Hildenbrand #define DEF_VMALO(BITS, TBITS) \ 3511b430aecSDavid Hildenbrand void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 3521b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3531b430aecSDavid Hildenbrand { \ 3541b430aecSDavid Hildenbrand int i, j; \ 3551b430aecSDavid Hildenbrand \ 3561b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 3571b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 3581b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 3591b430aecSDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \ 3601b430aecSDavid Hildenbrand \ 3611b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3621b430aecSDavid Hildenbrand } \ 3631b430aecSDavid Hildenbrand } 3641b430aecSDavid Hildenbrand DEF_VMALO(8, 16) 3651b430aecSDavid Hildenbrand DEF_VMALO(16, 32) 3661b430aecSDavid Hildenbrand DEF_VMALO(32, 64) 367*2bf3ee38SDavid Hildenbrand 368*2bf3ee38SDavid Hildenbrand #define DEF_VMH(BITS) \ 369*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 370*2bf3ee38SDavid Hildenbrand uint32_t desc) \ 371*2bf3ee38SDavid Hildenbrand { \ 372*2bf3ee38SDavid Hildenbrand int i; \ 373*2bf3ee38SDavid Hildenbrand \ 374*2bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 375*2bf3ee38SDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 376*2bf3ee38SDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 377*2bf3ee38SDavid Hildenbrand \ 378*2bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 379*2bf3ee38SDavid Hildenbrand } \ 380*2bf3ee38SDavid Hildenbrand } 381*2bf3ee38SDavid Hildenbrand DEF_VMH(8) 382*2bf3ee38SDavid Hildenbrand DEF_VMH(16) 383*2bf3ee38SDavid Hildenbrand 384*2bf3ee38SDavid Hildenbrand #define DEF_VMLH(BITS) \ 385*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 386*2bf3ee38SDavid Hildenbrand uint32_t desc) \ 387*2bf3ee38SDavid Hildenbrand { \ 388*2bf3ee38SDavid Hildenbrand int i; \ 389*2bf3ee38SDavid Hildenbrand \ 390*2bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 391*2bf3ee38SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 392*2bf3ee38SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 393*2bf3ee38SDavid Hildenbrand \ 394*2bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 395*2bf3ee38SDavid Hildenbrand } \ 396*2bf3ee38SDavid Hildenbrand } 397*2bf3ee38SDavid Hildenbrand DEF_VMLH(8) 398*2bf3ee38SDavid Hildenbrand DEF_VMLH(16) 399*2bf3ee38SDavid Hildenbrand 400*2bf3ee38SDavid Hildenbrand #define DEF_VME(BITS, TBITS) \ 401*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 402*2bf3ee38SDavid Hildenbrand uint32_t desc) \ 403*2bf3ee38SDavid Hildenbrand { \ 404*2bf3ee38SDavid Hildenbrand int i, j; \ 405*2bf3ee38SDavid Hildenbrand \ 406*2bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 407*2bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 408*2bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 409*2bf3ee38SDavid Hildenbrand \ 410*2bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 411*2bf3ee38SDavid Hildenbrand } \ 412*2bf3ee38SDavid Hildenbrand } 413*2bf3ee38SDavid Hildenbrand DEF_VME(8, 16) 414*2bf3ee38SDavid Hildenbrand DEF_VME(16, 32) 415*2bf3ee38SDavid Hildenbrand DEF_VME(32, 64) 416*2bf3ee38SDavid Hildenbrand 417*2bf3ee38SDavid Hildenbrand #define DEF_VMLE(BITS, TBITS) \ 418*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 419*2bf3ee38SDavid Hildenbrand uint32_t desc) \ 420*2bf3ee38SDavid Hildenbrand { \ 421*2bf3ee38SDavid Hildenbrand int i, j; \ 422*2bf3ee38SDavid Hildenbrand \ 423*2bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 424*2bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 425*2bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 426*2bf3ee38SDavid Hildenbrand \ 427*2bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 428*2bf3ee38SDavid Hildenbrand } \ 429*2bf3ee38SDavid Hildenbrand } 430*2bf3ee38SDavid Hildenbrand DEF_VMLE(8, 16) 431*2bf3ee38SDavid Hildenbrand DEF_VMLE(16, 32) 432*2bf3ee38SDavid Hildenbrand DEF_VMLE(32, 64) 433*2bf3ee38SDavid Hildenbrand 434*2bf3ee38SDavid Hildenbrand #define DEF_VMO(BITS, TBITS) \ 435*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 436*2bf3ee38SDavid Hildenbrand uint32_t desc) \ 437*2bf3ee38SDavid Hildenbrand { \ 438*2bf3ee38SDavid Hildenbrand int i, j; \ 439*2bf3ee38SDavid Hildenbrand \ 440*2bf3ee38SDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 441*2bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 442*2bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 443*2bf3ee38SDavid Hildenbrand \ 444*2bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 445*2bf3ee38SDavid Hildenbrand } \ 446*2bf3ee38SDavid Hildenbrand } 447*2bf3ee38SDavid Hildenbrand DEF_VMO(8, 16) 448*2bf3ee38SDavid Hildenbrand DEF_VMO(16, 32) 449*2bf3ee38SDavid Hildenbrand DEF_VMO(32, 64) 450*2bf3ee38SDavid Hildenbrand 451*2bf3ee38SDavid Hildenbrand #define DEF_VMLO(BITS, TBITS) \ 452*2bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 453*2bf3ee38SDavid Hildenbrand uint32_t desc) \ 454*2bf3ee38SDavid Hildenbrand { \ 455*2bf3ee38SDavid Hildenbrand int i, j; \ 456*2bf3ee38SDavid Hildenbrand \ 457*2bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 458*2bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 459*2bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 460*2bf3ee38SDavid Hildenbrand \ 461*2bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 462*2bf3ee38SDavid Hildenbrand } \ 463*2bf3ee38SDavid Hildenbrand } 464*2bf3ee38SDavid Hildenbrand DEF_VMLO(8, 16) 465*2bf3ee38SDavid Hildenbrand DEF_VMLO(16, 32) 466*2bf3ee38SDavid Hildenbrand DEF_VMLO(32, 64) 467