1c1a81d4bSDavid Hildenbrand /* 2c1a81d4bSDavid Hildenbrand * QEMU TCG support -- s390x vector integer instruction support 3c1a81d4bSDavid Hildenbrand * 4c1a81d4bSDavid Hildenbrand * Copyright (C) 2019 Red Hat Inc 5c1a81d4bSDavid Hildenbrand * 6c1a81d4bSDavid Hildenbrand * Authors: 7c1a81d4bSDavid Hildenbrand * David Hildenbrand <david@redhat.com> 8c1a81d4bSDavid Hildenbrand * 9c1a81d4bSDavid Hildenbrand * This work is licensed under the terms of the GNU GPL, version 2 or later. 10c1a81d4bSDavid Hildenbrand * See the COPYING file in the top-level directory. 11c1a81d4bSDavid Hildenbrand */ 12c1a81d4bSDavid Hildenbrand #include "qemu/osdep.h" 13c1a81d4bSDavid Hildenbrand #include "qemu-common.h" 14c1a81d4bSDavid Hildenbrand #include "cpu.h" 15c1a81d4bSDavid Hildenbrand #include "vec.h" 16c1a81d4bSDavid Hildenbrand #include "exec/helper-proto.h" 175c4b0ab4SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h" 18c1a81d4bSDavid Hildenbrand 19697a45d6SDavid Hildenbrand static bool s390_vec_is_zero(const S390Vector *v) 20697a45d6SDavid Hildenbrand { 21697a45d6SDavid Hildenbrand return !v->doubleword[0] && !v->doubleword[1]; 22697a45d6SDavid Hildenbrand } 23697a45d6SDavid Hildenbrand 24697a45d6SDavid Hildenbrand static void s390_vec_xor(S390Vector *res, const S390Vector *a, 25697a45d6SDavid Hildenbrand const S390Vector *b) 26697a45d6SDavid Hildenbrand { 27697a45d6SDavid Hildenbrand res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 28697a45d6SDavid Hildenbrand res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 29697a45d6SDavid Hildenbrand } 30697a45d6SDavid Hildenbrand 31db156ebfSDavid Hildenbrand static void s390_vec_and(S390Vector *res, const S390Vector *a, 32db156ebfSDavid Hildenbrand const S390Vector *b) 33db156ebfSDavid Hildenbrand { 34db156ebfSDavid Hildenbrand res->doubleword[0] = a->doubleword[0] & b->doubleword[0]; 35db156ebfSDavid Hildenbrand res->doubleword[1] = a->doubleword[1] & b->doubleword[1]; 36db156ebfSDavid Hildenbrand } 37db156ebfSDavid Hildenbrand 38db156ebfSDavid Hildenbrand static bool s390_vec_equal(const S390Vector *a, const S390Vector *b) 39db156ebfSDavid Hildenbrand { 40db156ebfSDavid Hildenbrand return a->doubleword[0] == b->doubleword[0] && 41db156ebfSDavid Hildenbrand a->doubleword[1] == b->doubleword[1]; 42db156ebfSDavid Hildenbrand } 43db156ebfSDavid Hildenbrand 44697a45d6SDavid Hildenbrand static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 45697a45d6SDavid Hildenbrand { 46697a45d6SDavid Hildenbrand uint64_t tmp; 47697a45d6SDavid Hildenbrand 48697a45d6SDavid Hildenbrand g_assert(count < 128); 49697a45d6SDavid Hildenbrand if (count == 0) { 50697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 51697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 52697a45d6SDavid Hildenbrand } else if (count == 64) { 53697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1]; 54697a45d6SDavid Hildenbrand d->doubleword[1] = 0; 55697a45d6SDavid Hildenbrand } else if (count < 64) { 56697a45d6SDavid Hildenbrand tmp = extract64(a->doubleword[1], 64 - count, count); 57697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1] << count; 58697a45d6SDavid Hildenbrand d->doubleword[0] = (a->doubleword[0] << count) | tmp; 59697a45d6SDavid Hildenbrand } else { 60697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[1] << (count - 64); 61697a45d6SDavid Hildenbrand d->doubleword[1] = 0; 62697a45d6SDavid Hildenbrand } 63697a45d6SDavid Hildenbrand } 64697a45d6SDavid Hildenbrand 655f724887SDavid Hildenbrand static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) 665f724887SDavid Hildenbrand { 675f724887SDavid Hildenbrand uint64_t tmp; 685f724887SDavid Hildenbrand 695f724887SDavid Hildenbrand if (count == 0) { 705f724887SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 715f724887SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 725f724887SDavid Hildenbrand } else if (count == 64) { 73*b57b3368SDavid Hildenbrand tmp = (int64_t)a->doubleword[0] >> 63; 745f724887SDavid Hildenbrand d->doubleword[1] = a->doubleword[0]; 75*b57b3368SDavid Hildenbrand d->doubleword[0] = tmp; 765f724887SDavid Hildenbrand } else if (count < 64) { 775f724887SDavid Hildenbrand tmp = a->doubleword[1] >> count; 785f724887SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 795f724887SDavid Hildenbrand d->doubleword[0] = (int64_t)a->doubleword[0] >> count; 805f724887SDavid Hildenbrand } else { 81*b57b3368SDavid Hildenbrand tmp = (int64_t)a->doubleword[0] >> 63; 825f724887SDavid Hildenbrand d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); 83*b57b3368SDavid Hildenbrand d->doubleword[0] = tmp; 845f724887SDavid Hildenbrand } 855f724887SDavid Hildenbrand } 865f724887SDavid Hildenbrand 87697a45d6SDavid Hildenbrand static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 88697a45d6SDavid Hildenbrand { 89697a45d6SDavid Hildenbrand uint64_t tmp; 90697a45d6SDavid Hildenbrand 91697a45d6SDavid Hildenbrand g_assert(count < 128); 92697a45d6SDavid Hildenbrand if (count == 0) { 93697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0]; 94697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[1]; 95697a45d6SDavid Hildenbrand } else if (count == 64) { 96697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0]; 97697a45d6SDavid Hildenbrand d->doubleword[0] = 0; 98697a45d6SDavid Hildenbrand } else if (count < 64) { 99697a45d6SDavid Hildenbrand tmp = a->doubleword[1] >> count; 100697a45d6SDavid Hildenbrand d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 101697a45d6SDavid Hildenbrand d->doubleword[0] = a->doubleword[0] >> count; 102697a45d6SDavid Hildenbrand } else { 103697a45d6SDavid Hildenbrand d->doubleword[1] = a->doubleword[0] >> (count - 64); 104697a45d6SDavid Hildenbrand d->doubleword[0] = 0; 105697a45d6SDavid Hildenbrand } 106697a45d6SDavid Hildenbrand } 107c1a81d4bSDavid Hildenbrand #define DEF_VAVG(BITS) \ 108c1a81d4bSDavid Hildenbrand void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 109c1a81d4bSDavid Hildenbrand uint32_t desc) \ 110c1a81d4bSDavid Hildenbrand { \ 111c1a81d4bSDavid Hildenbrand int i; \ 112c1a81d4bSDavid Hildenbrand \ 113c1a81d4bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 114c1a81d4bSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 115c1a81d4bSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 116c1a81d4bSDavid Hildenbrand \ 117c1a81d4bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 118c1a81d4bSDavid Hildenbrand } \ 119c1a81d4bSDavid Hildenbrand } 120c1a81d4bSDavid Hildenbrand DEF_VAVG(8) 121c1a81d4bSDavid Hildenbrand DEF_VAVG(16) 122801aa78bSDavid Hildenbrand 123801aa78bSDavid Hildenbrand #define DEF_VAVGL(BITS) \ 124801aa78bSDavid Hildenbrand void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 125801aa78bSDavid Hildenbrand uint32_t desc) \ 126801aa78bSDavid Hildenbrand { \ 127801aa78bSDavid Hildenbrand int i; \ 128801aa78bSDavid Hildenbrand \ 129801aa78bSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 130801aa78bSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 131801aa78bSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 132801aa78bSDavid Hildenbrand \ 133801aa78bSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 134801aa78bSDavid Hildenbrand } \ 135801aa78bSDavid Hildenbrand } 136801aa78bSDavid Hildenbrand DEF_VAVGL(8) 137801aa78bSDavid Hildenbrand DEF_VAVGL(16) 13828863f1dSDavid Hildenbrand 13928863f1dSDavid Hildenbrand #define DEF_VCLZ(BITS) \ 14028863f1dSDavid Hildenbrand void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 14128863f1dSDavid Hildenbrand { \ 14228863f1dSDavid Hildenbrand int i; \ 14328863f1dSDavid Hildenbrand \ 14428863f1dSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 14528863f1dSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 14628863f1dSDavid Hildenbrand \ 14728863f1dSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 14828863f1dSDavid Hildenbrand } \ 14928863f1dSDavid Hildenbrand } 15028863f1dSDavid Hildenbrand DEF_VCLZ(8) 15128863f1dSDavid Hildenbrand DEF_VCLZ(16) 152449a8ac2SDavid Hildenbrand 153449a8ac2SDavid Hildenbrand #define DEF_VCTZ(BITS) \ 154449a8ac2SDavid Hildenbrand void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 155449a8ac2SDavid Hildenbrand { \ 156449a8ac2SDavid Hildenbrand int i; \ 157449a8ac2SDavid Hildenbrand \ 158449a8ac2SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 159449a8ac2SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 160449a8ac2SDavid Hildenbrand \ 161449a8ac2SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 162449a8ac2SDavid Hildenbrand } \ 163449a8ac2SDavid Hildenbrand } 164449a8ac2SDavid Hildenbrand DEF_VCTZ(8) 165449a8ac2SDavid Hildenbrand DEF_VCTZ(16) 166697a45d6SDavid Hildenbrand 167697a45d6SDavid Hildenbrand /* like binary multiplication, but XOR instead of addition */ 168697a45d6SDavid Hildenbrand #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ 169697a45d6SDavid Hildenbrand static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ 170697a45d6SDavid Hildenbrand uint##TBITS##_t b) \ 171697a45d6SDavid Hildenbrand { \ 172697a45d6SDavid Hildenbrand uint##TBITS##_t res = 0; \ 173697a45d6SDavid Hildenbrand \ 174697a45d6SDavid Hildenbrand while (b) { \ 175697a45d6SDavid Hildenbrand if (b & 0x1) { \ 176697a45d6SDavid Hildenbrand res = res ^ a; \ 177697a45d6SDavid Hildenbrand } \ 178697a45d6SDavid Hildenbrand a = a << 1; \ 179697a45d6SDavid Hildenbrand b = b >> 1; \ 180697a45d6SDavid Hildenbrand } \ 181697a45d6SDavid Hildenbrand return res; \ 182697a45d6SDavid Hildenbrand } 183697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(8, 16) 184697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(16, 32) 185697a45d6SDavid Hildenbrand DEF_GALOIS_MULTIPLY(32, 64) 186697a45d6SDavid Hildenbrand 187697a45d6SDavid Hildenbrand static S390Vector galois_multiply64(uint64_t a, uint64_t b) 188697a45d6SDavid Hildenbrand { 189697a45d6SDavid Hildenbrand S390Vector res = {}; 190697a45d6SDavid Hildenbrand S390Vector va = { 191697a45d6SDavid Hildenbrand .doubleword[1] = a, 192697a45d6SDavid Hildenbrand }; 193697a45d6SDavid Hildenbrand S390Vector vb = { 194697a45d6SDavid Hildenbrand .doubleword[1] = b, 195697a45d6SDavid Hildenbrand }; 196697a45d6SDavid Hildenbrand 197697a45d6SDavid Hildenbrand while (!s390_vec_is_zero(&vb)) { 198697a45d6SDavid Hildenbrand if (vb.doubleword[1] & 0x1) { 199697a45d6SDavid Hildenbrand s390_vec_xor(&res, &res, &va); 200697a45d6SDavid Hildenbrand } 201697a45d6SDavid Hildenbrand s390_vec_shl(&va, &va, 1); 202697a45d6SDavid Hildenbrand s390_vec_shr(&vb, &vb, 1); 203697a45d6SDavid Hildenbrand } 204697a45d6SDavid Hildenbrand return res; 205697a45d6SDavid Hildenbrand } 206697a45d6SDavid Hildenbrand 207697a45d6SDavid Hildenbrand #define DEF_VGFM(BITS, TBITS) \ 208697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ 209697a45d6SDavid Hildenbrand uint32_t desc) \ 210697a45d6SDavid Hildenbrand { \ 211697a45d6SDavid Hildenbrand int i; \ 212697a45d6SDavid Hildenbrand \ 213697a45d6SDavid Hildenbrand for (i = 0; i < (128 / TBITS); i++) { \ 214697a45d6SDavid Hildenbrand uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 215697a45d6SDavid Hildenbrand uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 216697a45d6SDavid Hildenbrand uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 217697a45d6SDavid Hildenbrand \ 218697a45d6SDavid Hildenbrand a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 219697a45d6SDavid Hildenbrand b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 220697a45d6SDavid Hildenbrand d = d ^ galois_multiply32(a, b); \ 221697a45d6SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, d); \ 222697a45d6SDavid Hildenbrand } \ 223697a45d6SDavid Hildenbrand } 224697a45d6SDavid Hildenbrand DEF_VGFM(8, 16) 225697a45d6SDavid Hildenbrand DEF_VGFM(16, 32) 226697a45d6SDavid Hildenbrand DEF_VGFM(32, 64) 227697a45d6SDavid Hildenbrand 228697a45d6SDavid Hildenbrand void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 229697a45d6SDavid Hildenbrand uint32_t desc) 230697a45d6SDavid Hildenbrand { 231697a45d6SDavid Hildenbrand S390Vector tmp1, tmp2; 232697a45d6SDavid Hildenbrand uint64_t a, b; 233697a45d6SDavid Hildenbrand 234697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 0); 235697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 0); 236697a45d6SDavid Hildenbrand tmp1 = galois_multiply64(a, b); 237697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 1); 238697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 1); 239697a45d6SDavid Hildenbrand tmp2 = galois_multiply64(a, b); 240697a45d6SDavid Hildenbrand s390_vec_xor(v1, &tmp1, &tmp2); 241697a45d6SDavid Hildenbrand } 242697a45d6SDavid Hildenbrand 243697a45d6SDavid Hildenbrand #define DEF_VGFMA(BITS, TBITS) \ 244697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ 245697a45d6SDavid Hildenbrand const void *v4, uint32_t desc) \ 246697a45d6SDavid Hildenbrand { \ 247697a45d6SDavid Hildenbrand int i; \ 248697a45d6SDavid Hildenbrand \ 249697a45d6SDavid Hildenbrand for (i = 0; i < (128 / TBITS); i++) { \ 250697a45d6SDavid Hildenbrand uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 251697a45d6SDavid Hildenbrand uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 252697a45d6SDavid Hildenbrand uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 253697a45d6SDavid Hildenbrand \ 254697a45d6SDavid Hildenbrand a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 255697a45d6SDavid Hildenbrand b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 256697a45d6SDavid Hildenbrand d = d ^ galois_multiply32(a, b); \ 257697a45d6SDavid Hildenbrand d = d ^ s390_vec_read_element##TBITS(v4, i); \ 258697a45d6SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, d); \ 259697a45d6SDavid Hildenbrand } \ 260697a45d6SDavid Hildenbrand } 261697a45d6SDavid Hildenbrand DEF_VGFMA(8, 16) 262697a45d6SDavid Hildenbrand DEF_VGFMA(16, 32) 263697a45d6SDavid Hildenbrand DEF_VGFMA(32, 64) 264697a45d6SDavid Hildenbrand 265697a45d6SDavid Hildenbrand void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 266697a45d6SDavid Hildenbrand const void *v4, uint32_t desc) 267697a45d6SDavid Hildenbrand { 268697a45d6SDavid Hildenbrand S390Vector tmp1, tmp2; 269697a45d6SDavid Hildenbrand uint64_t a, b; 270697a45d6SDavid Hildenbrand 271697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 0); 272697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 0); 273697a45d6SDavid Hildenbrand tmp1 = galois_multiply64(a, b); 274697a45d6SDavid Hildenbrand a = s390_vec_read_element64(v2, 1); 275697a45d6SDavid Hildenbrand b = s390_vec_read_element64(v3, 1); 276697a45d6SDavid Hildenbrand tmp2 = galois_multiply64(a, b); 277697a45d6SDavid Hildenbrand s390_vec_xor(&tmp1, &tmp1, &tmp2); 278697a45d6SDavid Hildenbrand s390_vec_xor(v1, &tmp1, v4); 279697a45d6SDavid Hildenbrand } 2801b430aecSDavid Hildenbrand 2811b430aecSDavid Hildenbrand #define DEF_VMAL(BITS) \ 2821b430aecSDavid Hildenbrand void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 2831b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 2841b430aecSDavid Hildenbrand { \ 2851b430aecSDavid Hildenbrand int i; \ 2861b430aecSDavid Hildenbrand \ 2871b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 2881b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 2891b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 2901b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 2911b430aecSDavid Hildenbrand \ 2921b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a * b + c); \ 2931b430aecSDavid Hildenbrand } \ 2941b430aecSDavid Hildenbrand } 2951b430aecSDavid Hildenbrand DEF_VMAL(8) 2961b430aecSDavid Hildenbrand DEF_VMAL(16) 2971b430aecSDavid Hildenbrand 2981b430aecSDavid Hildenbrand #define DEF_VMAH(BITS) \ 2991b430aecSDavid Hildenbrand void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 3001b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3011b430aecSDavid Hildenbrand { \ 3021b430aecSDavid Hildenbrand int i; \ 3031b430aecSDavid Hildenbrand \ 3041b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 3051b430aecSDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 3061b430aecSDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 3071b430aecSDavid Hildenbrand const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 3081b430aecSDavid Hildenbrand \ 3091b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 3101b430aecSDavid Hildenbrand } \ 3111b430aecSDavid Hildenbrand } 3121b430aecSDavid Hildenbrand DEF_VMAH(8) 3131b430aecSDavid Hildenbrand DEF_VMAH(16) 3141b430aecSDavid Hildenbrand 3151b430aecSDavid Hildenbrand #define DEF_VMALH(BITS) \ 3161b430aecSDavid Hildenbrand void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 3171b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3181b430aecSDavid Hildenbrand { \ 3191b430aecSDavid Hildenbrand int i; \ 3201b430aecSDavid Hildenbrand \ 3211b430aecSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 3221b430aecSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 3231b430aecSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 3241b430aecSDavid Hildenbrand const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 3251b430aecSDavid Hildenbrand \ 3261b430aecSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 3271b430aecSDavid Hildenbrand } \ 3281b430aecSDavid Hildenbrand } 3291b430aecSDavid Hildenbrand DEF_VMALH(8) 3301b430aecSDavid Hildenbrand DEF_VMALH(16) 3311b430aecSDavid Hildenbrand 3321b430aecSDavid Hildenbrand #define DEF_VMAE(BITS, TBITS) \ 3331b430aecSDavid Hildenbrand void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 3341b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3351b430aecSDavid Hildenbrand { \ 3361b430aecSDavid Hildenbrand int i, j; \ 3371b430aecSDavid Hildenbrand \ 3381b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 3391b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 3401b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 3418b952519SDavid Hildenbrand int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 3421b430aecSDavid Hildenbrand \ 3431b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3441b430aecSDavid Hildenbrand } \ 3451b430aecSDavid Hildenbrand } 3461b430aecSDavid Hildenbrand DEF_VMAE(8, 16) 3471b430aecSDavid Hildenbrand DEF_VMAE(16, 32) 3481b430aecSDavid Hildenbrand DEF_VMAE(32, 64) 3491b430aecSDavid Hildenbrand 3501b430aecSDavid Hildenbrand #define DEF_VMALE(BITS, TBITS) \ 3511b430aecSDavid Hildenbrand void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 3521b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3531b430aecSDavid Hildenbrand { \ 3541b430aecSDavid Hildenbrand int i, j; \ 3551b430aecSDavid Hildenbrand \ 3561b430aecSDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 3571b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 3581b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 3598b952519SDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 3601b430aecSDavid Hildenbrand \ 3611b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3621b430aecSDavid Hildenbrand } \ 3631b430aecSDavid Hildenbrand } 3641b430aecSDavid Hildenbrand DEF_VMALE(8, 16) 3651b430aecSDavid Hildenbrand DEF_VMALE(16, 32) 3661b430aecSDavid Hildenbrand DEF_VMALE(32, 64) 3671b430aecSDavid Hildenbrand 3681b430aecSDavid Hildenbrand #define DEF_VMAO(BITS, TBITS) \ 3691b430aecSDavid Hildenbrand void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 3701b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3711b430aecSDavid Hildenbrand { \ 3721b430aecSDavid Hildenbrand int i, j; \ 3731b430aecSDavid Hildenbrand \ 3741b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 3751b430aecSDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 3761b430aecSDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 3778b952519SDavid Hildenbrand int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 3781b430aecSDavid Hildenbrand \ 3791b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3801b430aecSDavid Hildenbrand } \ 3811b430aecSDavid Hildenbrand } 3821b430aecSDavid Hildenbrand DEF_VMAO(8, 16) 3831b430aecSDavid Hildenbrand DEF_VMAO(16, 32) 3841b430aecSDavid Hildenbrand DEF_VMAO(32, 64) 3851b430aecSDavid Hildenbrand 3861b430aecSDavid Hildenbrand #define DEF_VMALO(BITS, TBITS) \ 3871b430aecSDavid Hildenbrand void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 3881b430aecSDavid Hildenbrand const void *v4, uint32_t desc) \ 3891b430aecSDavid Hildenbrand { \ 3901b430aecSDavid Hildenbrand int i, j; \ 3911b430aecSDavid Hildenbrand \ 3921b430aecSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 3931b430aecSDavid Hildenbrand uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 3941b430aecSDavid Hildenbrand uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 3958b952519SDavid Hildenbrand uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 3961b430aecSDavid Hildenbrand \ 3971b430aecSDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b + c); \ 3981b430aecSDavid Hildenbrand } \ 3991b430aecSDavid Hildenbrand } 4001b430aecSDavid Hildenbrand DEF_VMALO(8, 16) 4011b430aecSDavid Hildenbrand DEF_VMALO(16, 32) 4021b430aecSDavid Hildenbrand DEF_VMALO(32, 64) 4032bf3ee38SDavid Hildenbrand 4042bf3ee38SDavid Hildenbrand #define DEF_VMH(BITS) \ 4052bf3ee38SDavid Hildenbrand void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 4062bf3ee38SDavid Hildenbrand uint32_t desc) \ 4072bf3ee38SDavid Hildenbrand { \ 4082bf3ee38SDavid Hildenbrand int i; \ 4092bf3ee38SDavid Hildenbrand \ 4102bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 4112bf3ee38SDavid Hildenbrand const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 4122bf3ee38SDavid Hildenbrand const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 4132bf3ee38SDavid Hildenbrand \ 4142bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 4152bf3ee38SDavid Hildenbrand } \ 4162bf3ee38SDavid Hildenbrand } 4172bf3ee38SDavid Hildenbrand DEF_VMH(8) 4182bf3ee38SDavid Hildenbrand DEF_VMH(16) 4192bf3ee38SDavid Hildenbrand 4202bf3ee38SDavid Hildenbrand #define DEF_VMLH(BITS) \ 4212bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 4222bf3ee38SDavid Hildenbrand uint32_t desc) \ 4232bf3ee38SDavid Hildenbrand { \ 4242bf3ee38SDavid Hildenbrand int i; \ 4252bf3ee38SDavid Hildenbrand \ 4262bf3ee38SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 4272bf3ee38SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 4282bf3ee38SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 4292bf3ee38SDavid Hildenbrand \ 4302bf3ee38SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 4312bf3ee38SDavid Hildenbrand } \ 4322bf3ee38SDavid Hildenbrand } 4332bf3ee38SDavid Hildenbrand DEF_VMLH(8) 4342bf3ee38SDavid Hildenbrand DEF_VMLH(16) 4352bf3ee38SDavid Hildenbrand 4362bf3ee38SDavid Hildenbrand #define DEF_VME(BITS, TBITS) \ 4372bf3ee38SDavid Hildenbrand void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 4382bf3ee38SDavid Hildenbrand uint32_t desc) \ 4392bf3ee38SDavid Hildenbrand { \ 4402bf3ee38SDavid Hildenbrand int i, j; \ 4412bf3ee38SDavid Hildenbrand \ 4422bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 4432bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 4442bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 4452bf3ee38SDavid Hildenbrand \ 4462bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4472bf3ee38SDavid Hildenbrand } \ 4482bf3ee38SDavid Hildenbrand } 4492bf3ee38SDavid Hildenbrand DEF_VME(8, 16) 4502bf3ee38SDavid Hildenbrand DEF_VME(16, 32) 4512bf3ee38SDavid Hildenbrand DEF_VME(32, 64) 4522bf3ee38SDavid Hildenbrand 4532bf3ee38SDavid Hildenbrand #define DEF_VMLE(BITS, TBITS) \ 4542bf3ee38SDavid Hildenbrand void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 4552bf3ee38SDavid Hildenbrand uint32_t desc) \ 4562bf3ee38SDavid Hildenbrand { \ 4572bf3ee38SDavid Hildenbrand int i, j; \ 4582bf3ee38SDavid Hildenbrand \ 4592bf3ee38SDavid Hildenbrand for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 4602bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 4612bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 4622bf3ee38SDavid Hildenbrand \ 4632bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4642bf3ee38SDavid Hildenbrand } \ 4652bf3ee38SDavid Hildenbrand } 4662bf3ee38SDavid Hildenbrand DEF_VMLE(8, 16) 4672bf3ee38SDavid Hildenbrand DEF_VMLE(16, 32) 4682bf3ee38SDavid Hildenbrand DEF_VMLE(32, 64) 4692bf3ee38SDavid Hildenbrand 4702bf3ee38SDavid Hildenbrand #define DEF_VMO(BITS, TBITS) \ 4712bf3ee38SDavid Hildenbrand void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 4722bf3ee38SDavid Hildenbrand uint32_t desc) \ 4732bf3ee38SDavid Hildenbrand { \ 4742bf3ee38SDavid Hildenbrand int i, j; \ 4752bf3ee38SDavid Hildenbrand \ 4762bf3ee38SDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 4772bf3ee38SDavid Hildenbrand int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 4782bf3ee38SDavid Hildenbrand int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 4792bf3ee38SDavid Hildenbrand \ 4802bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4812bf3ee38SDavid Hildenbrand } \ 4822bf3ee38SDavid Hildenbrand } 4832bf3ee38SDavid Hildenbrand DEF_VMO(8, 16) 4842bf3ee38SDavid Hildenbrand DEF_VMO(16, 32) 4852bf3ee38SDavid Hildenbrand DEF_VMO(32, 64) 4862bf3ee38SDavid Hildenbrand 4872bf3ee38SDavid Hildenbrand #define DEF_VMLO(BITS, TBITS) \ 4882bf3ee38SDavid Hildenbrand void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 4892bf3ee38SDavid Hildenbrand uint32_t desc) \ 4902bf3ee38SDavid Hildenbrand { \ 4912bf3ee38SDavid Hildenbrand int i, j; \ 4922bf3ee38SDavid Hildenbrand \ 49349a7ce4eSDavid Hildenbrand for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 4942bf3ee38SDavid Hildenbrand const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 4952bf3ee38SDavid Hildenbrand const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 4962bf3ee38SDavid Hildenbrand \ 4972bf3ee38SDavid Hildenbrand s390_vec_write_element##TBITS(v1, i, a * b); \ 4982bf3ee38SDavid Hildenbrand } \ 4992bf3ee38SDavid Hildenbrand } 5002bf3ee38SDavid Hildenbrand DEF_VMLO(8, 16) 5012bf3ee38SDavid Hildenbrand DEF_VMLO(16, 32) 5022bf3ee38SDavid Hildenbrand DEF_VMLO(32, 64) 503c3838aaaSDavid Hildenbrand 504c3838aaaSDavid Hildenbrand #define DEF_VPOPCT(BITS) \ 505c3838aaaSDavid Hildenbrand void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ 506c3838aaaSDavid Hildenbrand { \ 507c3838aaaSDavid Hildenbrand int i; \ 508c3838aaaSDavid Hildenbrand \ 509c3838aaaSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 510c3838aaaSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 511c3838aaaSDavid Hildenbrand \ 512c3838aaaSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ 513c3838aaaSDavid Hildenbrand } \ 514c3838aaaSDavid Hildenbrand } 515c3838aaaSDavid Hildenbrand DEF_VPOPCT(8) 516c3838aaaSDavid Hildenbrand DEF_VPOPCT(16) 51755236da2SDavid Hildenbrand 51855236da2SDavid Hildenbrand #define DEF_VERLLV(BITS) \ 51955236da2SDavid Hildenbrand void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3, \ 52055236da2SDavid Hildenbrand uint32_t desc) \ 52155236da2SDavid Hildenbrand { \ 52255236da2SDavid Hildenbrand int i; \ 52355236da2SDavid Hildenbrand \ 52455236da2SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 52555236da2SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 52655236da2SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 52755236da2SDavid Hildenbrand \ 52855236da2SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, rol##BITS(a, b)); \ 52955236da2SDavid Hildenbrand } \ 53055236da2SDavid Hildenbrand } 53155236da2SDavid Hildenbrand DEF_VERLLV(8) 53255236da2SDavid Hildenbrand DEF_VERLLV(16) 53355236da2SDavid Hildenbrand 53455236da2SDavid Hildenbrand #define DEF_VERLL(BITS) \ 53555236da2SDavid Hildenbrand void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count, \ 53655236da2SDavid Hildenbrand uint32_t desc) \ 53755236da2SDavid Hildenbrand { \ 53855236da2SDavid Hildenbrand int i; \ 53955236da2SDavid Hildenbrand \ 54055236da2SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 54155236da2SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 54255236da2SDavid Hildenbrand \ 54355236da2SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, rol##BITS(a, count)); \ 54455236da2SDavid Hildenbrand } \ 54555236da2SDavid Hildenbrand } 54655236da2SDavid Hildenbrand DEF_VERLL(8) 54755236da2SDavid Hildenbrand DEF_VERLL(16) 5485c4b0ab4SDavid Hildenbrand 5495c4b0ab4SDavid Hildenbrand #define DEF_VERIM(BITS) \ 5505c4b0ab4SDavid Hildenbrand void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ 5515c4b0ab4SDavid Hildenbrand uint32_t desc) \ 5525c4b0ab4SDavid Hildenbrand { \ 5535c4b0ab4SDavid Hildenbrand const uint8_t count = simd_data(desc); \ 5545c4b0ab4SDavid Hildenbrand int i; \ 5555c4b0ab4SDavid Hildenbrand \ 5565c4b0ab4SDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 5575c4b0ab4SDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ 5585c4b0ab4SDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ 5595c4b0ab4SDavid Hildenbrand const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ 5605c4b0ab4SDavid Hildenbrand const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ 5615c4b0ab4SDavid Hildenbrand \ 5625c4b0ab4SDavid Hildenbrand s390_vec_write_element##BITS(v1, i, d); \ 5635c4b0ab4SDavid Hildenbrand } \ 5645c4b0ab4SDavid Hildenbrand } 5655c4b0ab4SDavid Hildenbrand DEF_VERIM(8) 5665c4b0ab4SDavid Hildenbrand DEF_VERIM(16) 567dea33fc3SDavid Hildenbrand 568dea33fc3SDavid Hildenbrand void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, 569dea33fc3SDavid Hildenbrand uint32_t desc) 570dea33fc3SDavid Hildenbrand { 571dea33fc3SDavid Hildenbrand s390_vec_shl(v1, v2, count); 572dea33fc3SDavid Hildenbrand } 5735f724887SDavid Hildenbrand 5745f724887SDavid Hildenbrand void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, 5755f724887SDavid Hildenbrand uint32_t desc) 5765f724887SDavid Hildenbrand { 5775f724887SDavid Hildenbrand s390_vec_sar(v1, v2, count); 5785f724887SDavid Hildenbrand } 5798112274fSDavid Hildenbrand 5808112274fSDavid Hildenbrand void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, 5818112274fSDavid Hildenbrand uint32_t desc) 5828112274fSDavid Hildenbrand { 5838112274fSDavid Hildenbrand s390_vec_shr(v1, v2, count); 5848112274fSDavid Hildenbrand } 5851ee2d7baSDavid Hildenbrand 5861ee2d7baSDavid Hildenbrand #define DEF_VSCBI(BITS) \ 5871ee2d7baSDavid Hildenbrand void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ 5881ee2d7baSDavid Hildenbrand uint32_t desc) \ 5891ee2d7baSDavid Hildenbrand { \ 5901ee2d7baSDavid Hildenbrand int i; \ 5911ee2d7baSDavid Hildenbrand \ 5921ee2d7baSDavid Hildenbrand for (i = 0; i < (128 / BITS); i++) { \ 5931ee2d7baSDavid Hildenbrand const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 5941ee2d7baSDavid Hildenbrand const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 5951ee2d7baSDavid Hildenbrand \ 5961ee2d7baSDavid Hildenbrand s390_vec_write_element##BITS(v1, i, a < b); \ 5971ee2d7baSDavid Hildenbrand } \ 5981ee2d7baSDavid Hildenbrand } 5991ee2d7baSDavid Hildenbrand DEF_VSCBI(8) 6001ee2d7baSDavid Hildenbrand DEF_VSCBI(16) 601db156ebfSDavid Hildenbrand 602db156ebfSDavid Hildenbrand void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env, 603db156ebfSDavid Hildenbrand uint32_t desc) 604db156ebfSDavid Hildenbrand { 605db156ebfSDavid Hildenbrand S390Vector tmp; 606db156ebfSDavid Hildenbrand 607db156ebfSDavid Hildenbrand s390_vec_and(&tmp, v1, v2); 608db156ebfSDavid Hildenbrand if (s390_vec_is_zero(&tmp)) { 609db156ebfSDavid Hildenbrand /* Selected bits all zeros; or all mask bits zero */ 610db156ebfSDavid Hildenbrand env->cc_op = 0; 611db156ebfSDavid Hildenbrand } else if (s390_vec_equal(&tmp, v2)) { 612db156ebfSDavid Hildenbrand /* Selected bits all ones */ 613db156ebfSDavid Hildenbrand env->cc_op = 3; 614db156ebfSDavid Hildenbrand } else { 615db156ebfSDavid Hildenbrand /* Selected bits a mix of zeros and ones */ 616db156ebfSDavid Hildenbrand env->cc_op = 1; 617db156ebfSDavid Hildenbrand } 618db156ebfSDavid Hildenbrand } 619