11fd28638SDavid Hildenbrand /* 21fd28638SDavid Hildenbrand * QEMU TCG support -- s390x vector string instruction support 31fd28638SDavid Hildenbrand * 41fd28638SDavid Hildenbrand * Copyright (C) 2019 Red Hat Inc 51fd28638SDavid Hildenbrand * 61fd28638SDavid Hildenbrand * Authors: 71fd28638SDavid Hildenbrand * David Hildenbrand <david@redhat.com> 81fd28638SDavid Hildenbrand * 91fd28638SDavid Hildenbrand * This work is licensed under the terms of the GNU GPL, version 2 or later. 101fd28638SDavid Hildenbrand * See the COPYING file in the top-level directory. 111fd28638SDavid Hildenbrand */ 121fd28638SDavid Hildenbrand #include "qemu/osdep.h" 131fd28638SDavid Hildenbrand #include "qemu-common.h" 141fd28638SDavid Hildenbrand #include "cpu.h" 151fd28638SDavid Hildenbrand #include "internal.h" 161fd28638SDavid Hildenbrand #include "vec.h" 171fd28638SDavid Hildenbrand #include "tcg/tcg.h" 181fd28638SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h" 191fd28638SDavid Hildenbrand #include "exec/helper-proto.h" 201fd28638SDavid Hildenbrand 211fd28638SDavid Hildenbrand /* 221fd28638SDavid Hildenbrand * Returns a bit set in the MSB of each element that is zero, 231fd28638SDavid Hildenbrand * as defined by the mask. 241fd28638SDavid Hildenbrand */ 251fd28638SDavid Hildenbrand static inline uint64_t zero_search(uint64_t a, uint64_t mask) 261fd28638SDavid Hildenbrand { 271fd28638SDavid Hildenbrand return ~(((a & mask) + mask) | a | mask); 281fd28638SDavid Hildenbrand } 291fd28638SDavid Hildenbrand 301fd28638SDavid Hildenbrand /* 311fd28638SDavid Hildenbrand * Returns the byte offset for the first match, or 16 for no match. 321fd28638SDavid Hildenbrand */ 331fd28638SDavid Hildenbrand static inline int match_index(uint64_t c0, uint64_t c1) 341fd28638SDavid Hildenbrand { 351fd28638SDavid Hildenbrand return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3; 361fd28638SDavid Hildenbrand } 371fd28638SDavid Hildenbrand 381fd28638SDavid Hildenbrand /* 391fd28638SDavid Hildenbrand * Returns the number of bits composing one element. 401fd28638SDavid Hildenbrand */ 411fd28638SDavid Hildenbrand static uint8_t get_element_bits(uint8_t es) 421fd28638SDavid Hildenbrand { 431fd28638SDavid Hildenbrand return (1 << es) * BITS_PER_BYTE; 441fd28638SDavid Hildenbrand } 451fd28638SDavid Hildenbrand 461fd28638SDavid Hildenbrand /* 471fd28638SDavid Hildenbrand * Returns the bitmask for a single element. 481fd28638SDavid Hildenbrand */ 491fd28638SDavid Hildenbrand static uint64_t get_single_element_mask(uint8_t es) 501fd28638SDavid Hildenbrand { 511fd28638SDavid Hildenbrand return -1ull >> (64 - get_element_bits(es)); 521fd28638SDavid Hildenbrand } 531fd28638SDavid Hildenbrand 541fd28638SDavid Hildenbrand /* 551fd28638SDavid Hildenbrand * Returns the bitmask for a single element (excluding the MSB). 561fd28638SDavid Hildenbrand */ 571fd28638SDavid Hildenbrand static uint64_t get_single_element_lsbs_mask(uint8_t es) 581fd28638SDavid Hildenbrand { 591fd28638SDavid Hildenbrand return -1ull >> (65 - get_element_bits(es)); 601fd28638SDavid Hildenbrand } 611fd28638SDavid Hildenbrand 621fd28638SDavid Hildenbrand /* 631fd28638SDavid Hildenbrand * Returns the bitmasks for multiple elements (excluding the MSBs). 641fd28638SDavid Hildenbrand */ 651fd28638SDavid Hildenbrand static uint64_t get_element_lsbs_mask(uint8_t es) 661fd28638SDavid Hildenbrand { 671fd28638SDavid Hildenbrand return dup_const(es, get_single_element_lsbs_mask(es)); 681fd28638SDavid Hildenbrand } 691fd28638SDavid Hildenbrand 701fd28638SDavid Hildenbrand static int vfae(void *v1, const void *v2, const void *v3, bool in, 711fd28638SDavid Hildenbrand bool rt, bool zs, uint8_t es) 721fd28638SDavid Hildenbrand { 731fd28638SDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es); 741fd28638SDavid Hildenbrand const int bits = get_element_bits(es); 751fd28638SDavid Hildenbrand uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1; 761fd28638SDavid Hildenbrand uint64_t first_zero = 16; 771fd28638SDavid Hildenbrand uint64_t first_equal; 781fd28638SDavid Hildenbrand int i; 791fd28638SDavid Hildenbrand 801fd28638SDavid Hildenbrand a0 = s390_vec_read_element64(v2, 0); 811fd28638SDavid Hildenbrand a1 = s390_vec_read_element64(v2, 1); 821fd28638SDavid Hildenbrand b0 = s390_vec_read_element64(v3, 0); 831fd28638SDavid Hildenbrand b1 = s390_vec_read_element64(v3, 1); 841fd28638SDavid Hildenbrand e0 = 0; 851fd28638SDavid Hildenbrand e1 = 0; 861fd28638SDavid Hildenbrand /* compare against equality with every other element */ 871fd28638SDavid Hildenbrand for (i = 0; i < 64; i += bits) { 881fd28638SDavid Hildenbrand t0 = rol64(b0, i); 891fd28638SDavid Hildenbrand t1 = rol64(b1, i); 901fd28638SDavid Hildenbrand e0 |= zero_search(a0 ^ t0, mask); 911fd28638SDavid Hildenbrand e0 |= zero_search(a0 ^ t1, mask); 921fd28638SDavid Hildenbrand e1 |= zero_search(a1 ^ t0, mask); 931fd28638SDavid Hildenbrand e1 |= zero_search(a1 ^ t1, mask); 941fd28638SDavid Hildenbrand } 951fd28638SDavid Hildenbrand /* invert the result if requested - invert only the MSBs */ 961fd28638SDavid Hildenbrand if (in) { 971fd28638SDavid Hildenbrand e0 = ~e0 & ~mask; 981fd28638SDavid Hildenbrand e1 = ~e1 & ~mask; 991fd28638SDavid Hildenbrand } 1001fd28638SDavid Hildenbrand first_equal = match_index(e0, e1); 1011fd28638SDavid Hildenbrand 1021fd28638SDavid Hildenbrand if (zs) { 1031fd28638SDavid Hildenbrand z0 = zero_search(a0, mask); 1041fd28638SDavid Hildenbrand z1 = zero_search(a1, mask); 1051fd28638SDavid Hildenbrand first_zero = match_index(z0, z1); 1061fd28638SDavid Hildenbrand } 1071fd28638SDavid Hildenbrand 1081fd28638SDavid Hildenbrand if (rt) { 1091fd28638SDavid Hildenbrand e0 = (e0 >> (bits - 1)) * get_single_element_mask(es); 1101fd28638SDavid Hildenbrand e1 = (e1 >> (bits - 1)) * get_single_element_mask(es); 1111fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 0, e0); 1121fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 1, e1); 1131fd28638SDavid Hildenbrand } else { 1141fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); 1151fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 1, 0); 1161fd28638SDavid Hildenbrand } 1171fd28638SDavid Hildenbrand 1181fd28638SDavid Hildenbrand if (first_zero == 16 && first_equal == 16) { 1191fd28638SDavid Hildenbrand return 3; /* no match */ 1201fd28638SDavid Hildenbrand } else if (first_zero == 16) { 1211fd28638SDavid Hildenbrand return 1; /* matching elements, no match for zero */ 1221fd28638SDavid Hildenbrand } else if (first_equal < first_zero) { 1231fd28638SDavid Hildenbrand return 2; /* matching elements before match for zero */ 1241fd28638SDavid Hildenbrand } 1251fd28638SDavid Hildenbrand return 0; /* match for zero */ 1261fd28638SDavid Hildenbrand } 1271fd28638SDavid Hildenbrand 1281fd28638SDavid Hildenbrand #define DEF_VFAE_HELPER(BITS) \ 1291fd28638SDavid Hildenbrand void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \ 1301fd28638SDavid Hildenbrand uint32_t desc) \ 1311fd28638SDavid Hildenbrand { \ 1321fd28638SDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \ 1331fd28638SDavid Hildenbrand const bool rt = extract32(simd_data(desc), 2, 1); \ 1341fd28638SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 1351fd28638SDavid Hildenbrand \ 1361fd28638SDavid Hildenbrand vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ 1371fd28638SDavid Hildenbrand } 1381fd28638SDavid Hildenbrand DEF_VFAE_HELPER(8) 1391fd28638SDavid Hildenbrand DEF_VFAE_HELPER(16) 1401fd28638SDavid Hildenbrand DEF_VFAE_HELPER(32) 1411fd28638SDavid Hildenbrand 1421fd28638SDavid Hildenbrand #define DEF_VFAE_CC_HELPER(BITS) \ 1431fd28638SDavid Hildenbrand void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \ 1441fd28638SDavid Hildenbrand CPUS390XState *env, uint32_t desc) \ 1451fd28638SDavid Hildenbrand { \ 1461fd28638SDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \ 1471fd28638SDavid Hildenbrand const bool rt = extract32(simd_data(desc), 2, 1); \ 1481fd28638SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 1491fd28638SDavid Hildenbrand \ 1501fd28638SDavid Hildenbrand env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ 1511fd28638SDavid Hildenbrand } 1521fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(8) 1531fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(16) 1541fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(32) 155*8c0e1e58SDavid Hildenbrand 156*8c0e1e58SDavid Hildenbrand static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) 157*8c0e1e58SDavid Hildenbrand { 158*8c0e1e58SDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es); 159*8c0e1e58SDavid Hildenbrand uint64_t a0, a1, b0, b1, e0, e1, z0, z1; 160*8c0e1e58SDavid Hildenbrand uint64_t first_zero = 16; 161*8c0e1e58SDavid Hildenbrand uint64_t first_equal; 162*8c0e1e58SDavid Hildenbrand 163*8c0e1e58SDavid Hildenbrand a0 = s390_vec_read_element64(v2, 0); 164*8c0e1e58SDavid Hildenbrand a1 = s390_vec_read_element64(v2, 1); 165*8c0e1e58SDavid Hildenbrand b0 = s390_vec_read_element64(v3, 0); 166*8c0e1e58SDavid Hildenbrand b1 = s390_vec_read_element64(v3, 1); 167*8c0e1e58SDavid Hildenbrand e0 = zero_search(a0 ^ b0, mask); 168*8c0e1e58SDavid Hildenbrand e1 = zero_search(a1 ^ b1, mask); 169*8c0e1e58SDavid Hildenbrand first_equal = match_index(e0, e1); 170*8c0e1e58SDavid Hildenbrand 171*8c0e1e58SDavid Hildenbrand if (zs) { 172*8c0e1e58SDavid Hildenbrand z0 = zero_search(a0, mask); 173*8c0e1e58SDavid Hildenbrand z1 = zero_search(a1, mask); 174*8c0e1e58SDavid Hildenbrand first_zero = match_index(z0, z1); 175*8c0e1e58SDavid Hildenbrand } 176*8c0e1e58SDavid Hildenbrand 177*8c0e1e58SDavid Hildenbrand s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); 178*8c0e1e58SDavid Hildenbrand s390_vec_write_element64(v1, 1, 0); 179*8c0e1e58SDavid Hildenbrand if (first_zero == 16 && first_equal == 16) { 180*8c0e1e58SDavid Hildenbrand return 3; /* no match */ 181*8c0e1e58SDavid Hildenbrand } else if (first_zero == 16) { 182*8c0e1e58SDavid Hildenbrand return 1; /* matching elements, no match for zero */ 183*8c0e1e58SDavid Hildenbrand } else if (first_equal < first_zero) { 184*8c0e1e58SDavid Hildenbrand return 2; /* matching elements before match for zero */ 185*8c0e1e58SDavid Hildenbrand } 186*8c0e1e58SDavid Hildenbrand return 0; /* match for zero */ 187*8c0e1e58SDavid Hildenbrand } 188*8c0e1e58SDavid Hildenbrand 189*8c0e1e58SDavid Hildenbrand #define DEF_VFEE_HELPER(BITS) \ 190*8c0e1e58SDavid Hildenbrand void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \ 191*8c0e1e58SDavid Hildenbrand uint32_t desc) \ 192*8c0e1e58SDavid Hildenbrand { \ 193*8c0e1e58SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 194*8c0e1e58SDavid Hildenbrand \ 195*8c0e1e58SDavid Hildenbrand vfee(v1, v2, v3, zs, MO_##BITS); \ 196*8c0e1e58SDavid Hildenbrand } 197*8c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(8) 198*8c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(16) 199*8c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(32) 200*8c0e1e58SDavid Hildenbrand 201*8c0e1e58SDavid Hildenbrand #define DEF_VFEE_CC_HELPER(BITS) \ 202*8c0e1e58SDavid Hildenbrand void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ 203*8c0e1e58SDavid Hildenbrand CPUS390XState *env, uint32_t desc) \ 204*8c0e1e58SDavid Hildenbrand { \ 205*8c0e1e58SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 206*8c0e1e58SDavid Hildenbrand \ 207*8c0e1e58SDavid Hildenbrand env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \ 208*8c0e1e58SDavid Hildenbrand } 209*8c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(8) 210*8c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(16) 211*8c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(32) 212