11fd28638SDavid Hildenbrand /* 21fd28638SDavid Hildenbrand * QEMU TCG support -- s390x vector string instruction support 31fd28638SDavid Hildenbrand * 41fd28638SDavid Hildenbrand * Copyright (C) 2019 Red Hat Inc 51fd28638SDavid Hildenbrand * 61fd28638SDavid Hildenbrand * Authors: 71fd28638SDavid Hildenbrand * David Hildenbrand <david@redhat.com> 81fd28638SDavid Hildenbrand * 91fd28638SDavid Hildenbrand * This work is licensed under the terms of the GNU GPL, version 2 or later. 101fd28638SDavid Hildenbrand * See the COPYING file in the top-level directory. 111fd28638SDavid Hildenbrand */ 121fd28638SDavid Hildenbrand #include "qemu/osdep.h" 131fd28638SDavid Hildenbrand #include "qemu-common.h" 141fd28638SDavid Hildenbrand #include "cpu.h" 15*b6b47223SCho, Yu-Chen #include "s390x-internal.h" 161fd28638SDavid Hildenbrand #include "vec.h" 171fd28638SDavid Hildenbrand #include "tcg/tcg.h" 181fd28638SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h" 191fd28638SDavid Hildenbrand #include "exec/helper-proto.h" 201fd28638SDavid Hildenbrand 211fd28638SDavid Hildenbrand /* 221fd28638SDavid Hildenbrand * Returns a bit set in the MSB of each element that is zero, 231fd28638SDavid Hildenbrand * as defined by the mask. 241fd28638SDavid Hildenbrand */ 251fd28638SDavid Hildenbrand static inline uint64_t zero_search(uint64_t a, uint64_t mask) 261fd28638SDavid Hildenbrand { 271fd28638SDavid Hildenbrand return ~(((a & mask) + mask) | a | mask); 281fd28638SDavid Hildenbrand } 291fd28638SDavid Hildenbrand 301fd28638SDavid Hildenbrand /* 31074e99b3SDavid Hildenbrand * Returns a bit set in the MSB of each element that is not zero, 32074e99b3SDavid Hildenbrand * as defined by the mask. 33074e99b3SDavid Hildenbrand */ 34074e99b3SDavid Hildenbrand static inline uint64_t nonzero_search(uint64_t a, uint64_t mask) 35074e99b3SDavid Hildenbrand { 36074e99b3SDavid Hildenbrand return (((a & mask) + mask) | a) & ~mask; 37074e99b3SDavid Hildenbrand } 38074e99b3SDavid Hildenbrand 39074e99b3SDavid Hildenbrand /* 401fd28638SDavid Hildenbrand * Returns the byte offset for the first match, or 16 for no match. 411fd28638SDavid Hildenbrand */ 421fd28638SDavid Hildenbrand static inline int match_index(uint64_t c0, uint64_t c1) 431fd28638SDavid Hildenbrand { 441fd28638SDavid Hildenbrand return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3; 451fd28638SDavid Hildenbrand } 461fd28638SDavid Hildenbrand 471fd28638SDavid Hildenbrand /* 481fd28638SDavid Hildenbrand * Returns the number of bits composing one element. 491fd28638SDavid Hildenbrand */ 501fd28638SDavid Hildenbrand static uint8_t get_element_bits(uint8_t es) 511fd28638SDavid Hildenbrand { 521fd28638SDavid Hildenbrand return (1 << es) * BITS_PER_BYTE; 531fd28638SDavid Hildenbrand } 541fd28638SDavid Hildenbrand 551fd28638SDavid Hildenbrand /* 561fd28638SDavid Hildenbrand * Returns the bitmask for a single element. 571fd28638SDavid Hildenbrand */ 581fd28638SDavid Hildenbrand static uint64_t get_single_element_mask(uint8_t es) 591fd28638SDavid Hildenbrand { 601fd28638SDavid Hildenbrand return -1ull >> (64 - get_element_bits(es)); 611fd28638SDavid Hildenbrand } 621fd28638SDavid Hildenbrand 631fd28638SDavid Hildenbrand /* 641fd28638SDavid Hildenbrand * Returns the bitmask for a single element (excluding the MSB). 651fd28638SDavid Hildenbrand */ 661fd28638SDavid Hildenbrand static uint64_t get_single_element_lsbs_mask(uint8_t es) 671fd28638SDavid Hildenbrand { 681fd28638SDavid Hildenbrand return -1ull >> (65 - get_element_bits(es)); 691fd28638SDavid Hildenbrand } 701fd28638SDavid Hildenbrand 711fd28638SDavid Hildenbrand /* 721fd28638SDavid Hildenbrand * Returns the bitmasks for multiple elements (excluding the MSBs). 731fd28638SDavid Hildenbrand */ 741fd28638SDavid Hildenbrand static uint64_t get_element_lsbs_mask(uint8_t es) 751fd28638SDavid Hildenbrand { 761fd28638SDavid Hildenbrand return dup_const(es, get_single_element_lsbs_mask(es)); 771fd28638SDavid Hildenbrand } 781fd28638SDavid Hildenbrand 791fd28638SDavid Hildenbrand static int vfae(void *v1, const void *v2, const void *v3, bool in, 801fd28638SDavid Hildenbrand bool rt, bool zs, uint8_t es) 811fd28638SDavid Hildenbrand { 821fd28638SDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es); 831fd28638SDavid Hildenbrand const int bits = get_element_bits(es); 841fd28638SDavid Hildenbrand uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1; 851fd28638SDavid Hildenbrand uint64_t first_zero = 16; 861fd28638SDavid Hildenbrand uint64_t first_equal; 871fd28638SDavid Hildenbrand int i; 881fd28638SDavid Hildenbrand 891fd28638SDavid Hildenbrand a0 = s390_vec_read_element64(v2, 0); 901fd28638SDavid Hildenbrand a1 = s390_vec_read_element64(v2, 1); 911fd28638SDavid Hildenbrand b0 = s390_vec_read_element64(v3, 0); 921fd28638SDavid Hildenbrand b1 = s390_vec_read_element64(v3, 1); 931fd28638SDavid Hildenbrand e0 = 0; 941fd28638SDavid Hildenbrand e1 = 0; 951fd28638SDavid Hildenbrand /* compare against equality with every other element */ 961fd28638SDavid Hildenbrand for (i = 0; i < 64; i += bits) { 971fd28638SDavid Hildenbrand t0 = rol64(b0, i); 981fd28638SDavid Hildenbrand t1 = rol64(b1, i); 991fd28638SDavid Hildenbrand e0 |= zero_search(a0 ^ t0, mask); 1001fd28638SDavid Hildenbrand e0 |= zero_search(a0 ^ t1, mask); 1011fd28638SDavid Hildenbrand e1 |= zero_search(a1 ^ t0, mask); 1021fd28638SDavid Hildenbrand e1 |= zero_search(a1 ^ t1, mask); 1031fd28638SDavid Hildenbrand } 1041fd28638SDavid Hildenbrand /* invert the result if requested - invert only the MSBs */ 1051fd28638SDavid Hildenbrand if (in) { 1061fd28638SDavid Hildenbrand e0 = ~e0 & ~mask; 1071fd28638SDavid Hildenbrand e1 = ~e1 & ~mask; 1081fd28638SDavid Hildenbrand } 1091fd28638SDavid Hildenbrand first_equal = match_index(e0, e1); 1101fd28638SDavid Hildenbrand 1111fd28638SDavid Hildenbrand if (zs) { 1121fd28638SDavid Hildenbrand z0 = zero_search(a0, mask); 1131fd28638SDavid Hildenbrand z1 = zero_search(a1, mask); 1141fd28638SDavid Hildenbrand first_zero = match_index(z0, z1); 1151fd28638SDavid Hildenbrand } 1161fd28638SDavid Hildenbrand 1171fd28638SDavid Hildenbrand if (rt) { 1181fd28638SDavid Hildenbrand e0 = (e0 >> (bits - 1)) * get_single_element_mask(es); 1191fd28638SDavid Hildenbrand e1 = (e1 >> (bits - 1)) * get_single_element_mask(es); 1201fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 0, e0); 1211fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 1, e1); 1221fd28638SDavid Hildenbrand } else { 1231fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); 1241fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 1, 0); 1251fd28638SDavid Hildenbrand } 1261fd28638SDavid Hildenbrand 1271fd28638SDavid Hildenbrand if (first_zero == 16 && first_equal == 16) { 1281fd28638SDavid Hildenbrand return 3; /* no match */ 1291fd28638SDavid Hildenbrand } else if (first_zero == 16) { 1301fd28638SDavid Hildenbrand return 1; /* matching elements, no match for zero */ 1311fd28638SDavid Hildenbrand } else if (first_equal < first_zero) { 1321fd28638SDavid Hildenbrand return 2; /* matching elements before match for zero */ 1331fd28638SDavid Hildenbrand } 1341fd28638SDavid Hildenbrand return 0; /* match for zero */ 1351fd28638SDavid Hildenbrand } 1361fd28638SDavid Hildenbrand 1371fd28638SDavid Hildenbrand #define DEF_VFAE_HELPER(BITS) \ 1381fd28638SDavid Hildenbrand void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \ 1391fd28638SDavid Hildenbrand uint32_t desc) \ 1401fd28638SDavid Hildenbrand { \ 1411fd28638SDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \ 1421fd28638SDavid Hildenbrand const bool rt = extract32(simd_data(desc), 2, 1); \ 1431fd28638SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 1441fd28638SDavid Hildenbrand \ 1451fd28638SDavid Hildenbrand vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ 1461fd28638SDavid Hildenbrand } 1471fd28638SDavid Hildenbrand DEF_VFAE_HELPER(8) 1481fd28638SDavid Hildenbrand DEF_VFAE_HELPER(16) 1491fd28638SDavid Hildenbrand DEF_VFAE_HELPER(32) 1501fd28638SDavid Hildenbrand 1511fd28638SDavid Hildenbrand #define DEF_VFAE_CC_HELPER(BITS) \ 1521fd28638SDavid Hildenbrand void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \ 1531fd28638SDavid Hildenbrand CPUS390XState *env, uint32_t desc) \ 1541fd28638SDavid Hildenbrand { \ 1551fd28638SDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \ 1561fd28638SDavid Hildenbrand const bool rt = extract32(simd_data(desc), 2, 1); \ 1571fd28638SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 1581fd28638SDavid Hildenbrand \ 1591fd28638SDavid Hildenbrand env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ 1601fd28638SDavid Hildenbrand } 1611fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(8) 1621fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(16) 1631fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(32) 1648c0e1e58SDavid Hildenbrand 1658c0e1e58SDavid Hildenbrand static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) 1668c0e1e58SDavid Hildenbrand { 1678c0e1e58SDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es); 1688c0e1e58SDavid Hildenbrand uint64_t a0, a1, b0, b1, e0, e1, z0, z1; 1698c0e1e58SDavid Hildenbrand uint64_t first_zero = 16; 1708c0e1e58SDavid Hildenbrand uint64_t first_equal; 1718c0e1e58SDavid Hildenbrand 1728c0e1e58SDavid Hildenbrand a0 = s390_vec_read_element64(v2, 0); 1738c0e1e58SDavid Hildenbrand a1 = s390_vec_read_element64(v2, 1); 1748c0e1e58SDavid Hildenbrand b0 = s390_vec_read_element64(v3, 0); 1758c0e1e58SDavid Hildenbrand b1 = s390_vec_read_element64(v3, 1); 1768c0e1e58SDavid Hildenbrand e0 = zero_search(a0 ^ b0, mask); 1778c0e1e58SDavid Hildenbrand e1 = zero_search(a1 ^ b1, mask); 1788c0e1e58SDavid Hildenbrand first_equal = match_index(e0, e1); 1798c0e1e58SDavid Hildenbrand 1808c0e1e58SDavid Hildenbrand if (zs) { 1818c0e1e58SDavid Hildenbrand z0 = zero_search(a0, mask); 1828c0e1e58SDavid Hildenbrand z1 = zero_search(a1, mask); 1838c0e1e58SDavid Hildenbrand first_zero = match_index(z0, z1); 1848c0e1e58SDavid Hildenbrand } 1858c0e1e58SDavid Hildenbrand 1868c0e1e58SDavid Hildenbrand s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); 1878c0e1e58SDavid Hildenbrand s390_vec_write_element64(v1, 1, 0); 1888c0e1e58SDavid Hildenbrand if (first_zero == 16 && first_equal == 16) { 1898c0e1e58SDavid Hildenbrand return 3; /* no match */ 1908c0e1e58SDavid Hildenbrand } else if (first_zero == 16) { 1918c0e1e58SDavid Hildenbrand return 1; /* matching elements, no match for zero */ 1928c0e1e58SDavid Hildenbrand } else if (first_equal < first_zero) { 1938c0e1e58SDavid Hildenbrand return 2; /* matching elements before match for zero */ 1948c0e1e58SDavid Hildenbrand } 1958c0e1e58SDavid Hildenbrand return 0; /* match for zero */ 1968c0e1e58SDavid Hildenbrand } 1978c0e1e58SDavid Hildenbrand 1988c0e1e58SDavid Hildenbrand #define DEF_VFEE_HELPER(BITS) \ 1998c0e1e58SDavid Hildenbrand void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \ 2008c0e1e58SDavid Hildenbrand uint32_t desc) \ 2018c0e1e58SDavid Hildenbrand { \ 2028c0e1e58SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 2038c0e1e58SDavid Hildenbrand \ 2048c0e1e58SDavid Hildenbrand vfee(v1, v2, v3, zs, MO_##BITS); \ 2058c0e1e58SDavid Hildenbrand } 2068c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(8) 2078c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(16) 2088c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(32) 2098c0e1e58SDavid Hildenbrand 2108c0e1e58SDavid Hildenbrand #define DEF_VFEE_CC_HELPER(BITS) \ 2118c0e1e58SDavid Hildenbrand void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ 2128c0e1e58SDavid Hildenbrand CPUS390XState *env, uint32_t desc) \ 2138c0e1e58SDavid Hildenbrand { \ 2148c0e1e58SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 2158c0e1e58SDavid Hildenbrand \ 2168c0e1e58SDavid Hildenbrand env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \ 2178c0e1e58SDavid Hildenbrand } 2188c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(8) 2198c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(16) 2208c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(32) 221074e99b3SDavid Hildenbrand 222074e99b3SDavid Hildenbrand static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) 223074e99b3SDavid Hildenbrand { 224074e99b3SDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es); 225074e99b3SDavid Hildenbrand uint64_t a0, a1, b0, b1, e0, e1, z0, z1; 226074e99b3SDavid Hildenbrand uint64_t first_zero = 16; 227074e99b3SDavid Hildenbrand uint64_t first_inequal; 228074e99b3SDavid Hildenbrand bool smaller = false; 229074e99b3SDavid Hildenbrand 230074e99b3SDavid Hildenbrand a0 = s390_vec_read_element64(v2, 0); 231074e99b3SDavid Hildenbrand a1 = s390_vec_read_element64(v2, 1); 232074e99b3SDavid Hildenbrand b0 = s390_vec_read_element64(v3, 0); 233074e99b3SDavid Hildenbrand b1 = s390_vec_read_element64(v3, 1); 234074e99b3SDavid Hildenbrand e0 = nonzero_search(a0 ^ b0, mask); 235074e99b3SDavid Hildenbrand e1 = nonzero_search(a1 ^ b1, mask); 236074e99b3SDavid Hildenbrand first_inequal = match_index(e0, e1); 237074e99b3SDavid Hildenbrand 238074e99b3SDavid Hildenbrand /* identify the smaller element */ 239074e99b3SDavid Hildenbrand if (first_inequal < 16) { 240074e99b3SDavid Hildenbrand uint8_t enr = first_inequal / (1 << es); 241074e99b3SDavid Hildenbrand uint32_t a = s390_vec_read_element(v2, enr, es); 242074e99b3SDavid Hildenbrand uint32_t b = s390_vec_read_element(v3, enr, es); 243074e99b3SDavid Hildenbrand 244074e99b3SDavid Hildenbrand smaller = a < b; 245074e99b3SDavid Hildenbrand } 246074e99b3SDavid Hildenbrand 247074e99b3SDavid Hildenbrand if (zs) { 248074e99b3SDavid Hildenbrand z0 = zero_search(a0, mask); 249074e99b3SDavid Hildenbrand z1 = zero_search(a1, mask); 250074e99b3SDavid Hildenbrand first_zero = match_index(z0, z1); 251074e99b3SDavid Hildenbrand } 252074e99b3SDavid Hildenbrand 253074e99b3SDavid Hildenbrand s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); 254074e99b3SDavid Hildenbrand s390_vec_write_element64(v1, 1, 0); 255074e99b3SDavid Hildenbrand if (first_zero == 16 && first_inequal == 16) { 256074e99b3SDavid Hildenbrand return 3; 257074e99b3SDavid Hildenbrand } else if (first_zero < first_inequal) { 258074e99b3SDavid Hildenbrand return 0; 259074e99b3SDavid Hildenbrand } 260074e99b3SDavid Hildenbrand return smaller ? 1 : 2; 261074e99b3SDavid Hildenbrand } 262074e99b3SDavid Hildenbrand 263074e99b3SDavid Hildenbrand #define DEF_VFENE_HELPER(BITS) \ 264074e99b3SDavid Hildenbrand void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \ 265074e99b3SDavid Hildenbrand uint32_t desc) \ 266074e99b3SDavid Hildenbrand { \ 267074e99b3SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 268074e99b3SDavid Hildenbrand \ 269074e99b3SDavid Hildenbrand vfene(v1, v2, v3, zs, MO_##BITS); \ 270074e99b3SDavid Hildenbrand } 271074e99b3SDavid Hildenbrand DEF_VFENE_HELPER(8) 272074e99b3SDavid Hildenbrand DEF_VFENE_HELPER(16) 273074e99b3SDavid Hildenbrand DEF_VFENE_HELPER(32) 274074e99b3SDavid Hildenbrand 275074e99b3SDavid Hildenbrand #define DEF_VFENE_CC_HELPER(BITS) \ 276074e99b3SDavid Hildenbrand void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \ 277074e99b3SDavid Hildenbrand CPUS390XState *env, uint32_t desc) \ 278074e99b3SDavid Hildenbrand { \ 279074e99b3SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 280074e99b3SDavid Hildenbrand \ 281074e99b3SDavid Hildenbrand env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \ 282074e99b3SDavid Hildenbrand } 283074e99b3SDavid Hildenbrand DEF_VFENE_CC_HELPER(8) 284074e99b3SDavid Hildenbrand DEF_VFENE_CC_HELPER(16) 285074e99b3SDavid Hildenbrand DEF_VFENE_CC_HELPER(32) 286be6324c6SDavid Hildenbrand 287be6324c6SDavid Hildenbrand static int vistr(void *v1, const void *v2, uint8_t es) 288be6324c6SDavid Hildenbrand { 289be6324c6SDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es); 290be6324c6SDavid Hildenbrand uint64_t a0 = s390_vec_read_element64(v2, 0); 291be6324c6SDavid Hildenbrand uint64_t a1 = s390_vec_read_element64(v2, 1); 292be6324c6SDavid Hildenbrand uint64_t z; 293be6324c6SDavid Hildenbrand int cc = 3; 294be6324c6SDavid Hildenbrand 295be6324c6SDavid Hildenbrand z = zero_search(a0, mask); 296be6324c6SDavid Hildenbrand if (z) { 297be6324c6SDavid Hildenbrand a0 &= ~(-1ull >> clz64(z)); 298be6324c6SDavid Hildenbrand a1 = 0; 299be6324c6SDavid Hildenbrand cc = 0; 300be6324c6SDavid Hildenbrand } else { 301be6324c6SDavid Hildenbrand z = zero_search(a1, mask); 302be6324c6SDavid Hildenbrand if (z) { 303be6324c6SDavid Hildenbrand a1 &= ~(-1ull >> clz64(z)); 304be6324c6SDavid Hildenbrand cc = 0; 305be6324c6SDavid Hildenbrand } 306be6324c6SDavid Hildenbrand } 307be6324c6SDavid Hildenbrand 308be6324c6SDavid Hildenbrand s390_vec_write_element64(v1, 0, a0); 309be6324c6SDavid Hildenbrand s390_vec_write_element64(v1, 1, a1); 310be6324c6SDavid Hildenbrand return cc; 311be6324c6SDavid Hildenbrand } 312be6324c6SDavid Hildenbrand 313be6324c6SDavid Hildenbrand #define DEF_VISTR_HELPER(BITS) \ 314be6324c6SDavid Hildenbrand void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \ 315be6324c6SDavid Hildenbrand { \ 316be6324c6SDavid Hildenbrand vistr(v1, v2, MO_##BITS); \ 317be6324c6SDavid Hildenbrand } 318be6324c6SDavid Hildenbrand DEF_VISTR_HELPER(8) 319be6324c6SDavid Hildenbrand DEF_VISTR_HELPER(16) 320be6324c6SDavid Hildenbrand DEF_VISTR_HELPER(32) 321be6324c6SDavid Hildenbrand 322be6324c6SDavid Hildenbrand #define DEF_VISTR_CC_HELPER(BITS) \ 323be6324c6SDavid Hildenbrand void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \ 324be6324c6SDavid Hildenbrand uint32_t desc) \ 325be6324c6SDavid Hildenbrand { \ 326be6324c6SDavid Hildenbrand env->cc_op = vistr(v1, v2, MO_##BITS); \ 327be6324c6SDavid Hildenbrand } 328be6324c6SDavid Hildenbrand DEF_VISTR_CC_HELPER(8) 329be6324c6SDavid Hildenbrand DEF_VISTR_CC_HELPER(16) 330be6324c6SDavid Hildenbrand DEF_VISTR_CC_HELPER(32) 33113b0228fSDavid Hildenbrand 33213b0228fSDavid Hildenbrand static bool element_compare(uint32_t data, uint32_t l, uint8_t c) 33313b0228fSDavid Hildenbrand { 33413b0228fSDavid Hildenbrand const bool equal = extract32(c, 7, 1); 33513b0228fSDavid Hildenbrand const bool lower = extract32(c, 6, 1); 33613b0228fSDavid Hildenbrand const bool higher = extract32(c, 5, 1); 33713b0228fSDavid Hildenbrand 33813b0228fSDavid Hildenbrand if (data < l) { 33913b0228fSDavid Hildenbrand return lower; 34013b0228fSDavid Hildenbrand } else if (data > l) { 34113b0228fSDavid Hildenbrand return higher; 34213b0228fSDavid Hildenbrand } 34313b0228fSDavid Hildenbrand return equal; 34413b0228fSDavid Hildenbrand } 34513b0228fSDavid Hildenbrand 34613b0228fSDavid Hildenbrand static int vstrc(void *v1, const void *v2, const void *v3, const void *v4, 34713b0228fSDavid Hildenbrand bool in, bool rt, bool zs, uint8_t es) 34813b0228fSDavid Hildenbrand { 34913b0228fSDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es); 35013b0228fSDavid Hildenbrand uint64_t a0 = s390_vec_read_element64(v2, 0); 35113b0228fSDavid Hildenbrand uint64_t a1 = s390_vec_read_element64(v2, 1); 35213b0228fSDavid Hildenbrand int first_zero = 16, first_match = 16; 35313b0228fSDavid Hildenbrand S390Vector rt_result = {}; 35413b0228fSDavid Hildenbrand uint64_t z0, z1; 35513b0228fSDavid Hildenbrand int i, j; 35613b0228fSDavid Hildenbrand 35713b0228fSDavid Hildenbrand if (zs) { 35813b0228fSDavid Hildenbrand z0 = zero_search(a0, mask); 35913b0228fSDavid Hildenbrand z1 = zero_search(a1, mask); 36013b0228fSDavid Hildenbrand first_zero = match_index(z0, z1); 36113b0228fSDavid Hildenbrand } 36213b0228fSDavid Hildenbrand 36313b0228fSDavid Hildenbrand for (i = 0; i < 16 / (1 << es); i++) { 36413b0228fSDavid Hildenbrand const uint32_t data = s390_vec_read_element(v2, i, es); 36513b0228fSDavid Hildenbrand const int cur_byte = i * (1 << es); 36613b0228fSDavid Hildenbrand bool any_match = false; 36713b0228fSDavid Hildenbrand 36813b0228fSDavid Hildenbrand /* if we don't need a bit vector, we can stop early */ 36913b0228fSDavid Hildenbrand if (cur_byte == first_zero && !rt) { 37013b0228fSDavid Hildenbrand break; 37113b0228fSDavid Hildenbrand } 37213b0228fSDavid Hildenbrand 37313b0228fSDavid Hildenbrand for (j = 0; j < 16 / (1 << es); j += 2) { 37413b0228fSDavid Hildenbrand const uint32_t l1 = s390_vec_read_element(v3, j, es); 37513b0228fSDavid Hildenbrand const uint32_t l2 = s390_vec_read_element(v3, j + 1, es); 37613b0228fSDavid Hildenbrand /* we are only interested in the highest byte of each element */ 37713b0228fSDavid Hildenbrand const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es)); 37813b0228fSDavid Hildenbrand const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es)); 37913b0228fSDavid Hildenbrand 38013b0228fSDavid Hildenbrand if (element_compare(data, l1, c1) && 38113b0228fSDavid Hildenbrand element_compare(data, l2, c2)) { 38213b0228fSDavid Hildenbrand any_match = true; 38313b0228fSDavid Hildenbrand break; 38413b0228fSDavid Hildenbrand } 38513b0228fSDavid Hildenbrand } 38613b0228fSDavid Hildenbrand /* invert the result if requested */ 38713b0228fSDavid Hildenbrand any_match = in ^ any_match; 38813b0228fSDavid Hildenbrand 38913b0228fSDavid Hildenbrand if (any_match) { 39013b0228fSDavid Hildenbrand /* indicate bit vector if requested */ 39113b0228fSDavid Hildenbrand if (rt) { 39213b0228fSDavid Hildenbrand const uint64_t val = -1ull; 39313b0228fSDavid Hildenbrand 39413b0228fSDavid Hildenbrand first_match = MIN(cur_byte, first_match); 39513b0228fSDavid Hildenbrand s390_vec_write_element(&rt_result, i, es, val); 39613b0228fSDavid Hildenbrand } else { 39713b0228fSDavid Hildenbrand /* stop on the first match */ 39813b0228fSDavid Hildenbrand first_match = cur_byte; 39913b0228fSDavid Hildenbrand break; 40013b0228fSDavid Hildenbrand } 40113b0228fSDavid Hildenbrand } 40213b0228fSDavid Hildenbrand } 40313b0228fSDavid Hildenbrand 40413b0228fSDavid Hildenbrand if (rt) { 40513b0228fSDavid Hildenbrand *(S390Vector *)v1 = rt_result; 40613b0228fSDavid Hildenbrand } else { 40713b0228fSDavid Hildenbrand s390_vec_write_element64(v1, 0, MIN(first_match, first_zero)); 40813b0228fSDavid Hildenbrand s390_vec_write_element64(v1, 1, 0); 40913b0228fSDavid Hildenbrand } 41013b0228fSDavid Hildenbrand 41113b0228fSDavid Hildenbrand if (first_zero == 16 && first_match == 16) { 41213b0228fSDavid Hildenbrand return 3; /* no match */ 41313b0228fSDavid Hildenbrand } else if (first_zero == 16) { 41413b0228fSDavid Hildenbrand return 1; /* matching elements, no match for zero */ 41513b0228fSDavid Hildenbrand } else if (first_match < first_zero) { 41613b0228fSDavid Hildenbrand return 2; /* matching elements before match for zero */ 41713b0228fSDavid Hildenbrand } 41813b0228fSDavid Hildenbrand return 0; /* match for zero */ 41913b0228fSDavid Hildenbrand } 42013b0228fSDavid Hildenbrand 42113b0228fSDavid Hildenbrand #define DEF_VSTRC_HELPER(BITS) \ 42213b0228fSDavid Hildenbrand void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \ 42313b0228fSDavid Hildenbrand const void *v4, uint32_t desc) \ 42413b0228fSDavid Hildenbrand { \ 42513b0228fSDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \ 42613b0228fSDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 42713b0228fSDavid Hildenbrand \ 42813b0228fSDavid Hildenbrand vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ 42913b0228fSDavid Hildenbrand } 43013b0228fSDavid Hildenbrand DEF_VSTRC_HELPER(8) 43113b0228fSDavid Hildenbrand DEF_VSTRC_HELPER(16) 43213b0228fSDavid Hildenbrand DEF_VSTRC_HELPER(32) 43313b0228fSDavid Hildenbrand 43413b0228fSDavid Hildenbrand #define DEF_VSTRC_RT_HELPER(BITS) \ 43513b0228fSDavid Hildenbrand void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \ 43613b0228fSDavid Hildenbrand const void *v4, uint32_t desc) \ 43713b0228fSDavid Hildenbrand { \ 43813b0228fSDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \ 43913b0228fSDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 44013b0228fSDavid Hildenbrand \ 44113b0228fSDavid Hildenbrand vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ 44213b0228fSDavid Hildenbrand } 44313b0228fSDavid Hildenbrand DEF_VSTRC_RT_HELPER(8) 44413b0228fSDavid Hildenbrand DEF_VSTRC_RT_HELPER(16) 44513b0228fSDavid Hildenbrand DEF_VSTRC_RT_HELPER(32) 44613b0228fSDavid Hildenbrand 44713b0228fSDavid Hildenbrand #define DEF_VSTRC_CC_HELPER(BITS) \ 44813b0228fSDavid Hildenbrand void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \ 44913b0228fSDavid Hildenbrand const void *v4, CPUS390XState *env, \ 45013b0228fSDavid Hildenbrand uint32_t desc) \ 45113b0228fSDavid Hildenbrand { \ 45213b0228fSDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \ 45313b0228fSDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 45413b0228fSDavid Hildenbrand \ 45513b0228fSDavid Hildenbrand env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ 45613b0228fSDavid Hildenbrand } 45713b0228fSDavid Hildenbrand DEF_VSTRC_CC_HELPER(8) 45813b0228fSDavid Hildenbrand DEF_VSTRC_CC_HELPER(16) 45913b0228fSDavid Hildenbrand DEF_VSTRC_CC_HELPER(32) 46013b0228fSDavid Hildenbrand 46113b0228fSDavid Hildenbrand #define DEF_VSTRC_CC_RT_HELPER(BITS) \ 46213b0228fSDavid Hildenbrand void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \ 46313b0228fSDavid Hildenbrand const void *v4, CPUS390XState *env, \ 46413b0228fSDavid Hildenbrand uint32_t desc) \ 46513b0228fSDavid Hildenbrand { \ 46613b0228fSDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \ 46713b0228fSDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \ 46813b0228fSDavid Hildenbrand \ 46913b0228fSDavid Hildenbrand env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ 47013b0228fSDavid Hildenbrand } 47113b0228fSDavid Hildenbrand DEF_VSTRC_CC_RT_HELPER(8) 47213b0228fSDavid Hildenbrand DEF_VSTRC_CC_RT_HELPER(16) 47313b0228fSDavid Hildenbrand DEF_VSTRC_CC_RT_HELPER(32) 474