11fd28638SDavid Hildenbrand /*
21fd28638SDavid Hildenbrand * QEMU TCG support -- s390x vector string instruction support
31fd28638SDavid Hildenbrand *
41fd28638SDavid Hildenbrand * Copyright (C) 2019 Red Hat Inc
51fd28638SDavid Hildenbrand *
61fd28638SDavid Hildenbrand * Authors:
71fd28638SDavid Hildenbrand * David Hildenbrand <david@redhat.com>
81fd28638SDavid Hildenbrand *
91fd28638SDavid Hildenbrand * This work is licensed under the terms of the GNU GPL, version 2 or later.
101fd28638SDavid Hildenbrand * See the COPYING file in the top-level directory.
111fd28638SDavid Hildenbrand */
121fd28638SDavid Hildenbrand #include "qemu/osdep.h"
131fd28638SDavid Hildenbrand #include "cpu.h"
14b6b47223SCho, Yu-Chen #include "s390x-internal.h"
151fd28638SDavid Hildenbrand #include "vec.h"
161fd28638SDavid Hildenbrand #include "tcg/tcg.h"
171fd28638SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h"
181fd28638SDavid Hildenbrand #include "exec/helper-proto.h"
191fd28638SDavid Hildenbrand
201fd28638SDavid Hildenbrand /*
211fd28638SDavid Hildenbrand * Returns a bit set in the MSB of each element that is zero,
221fd28638SDavid Hildenbrand * as defined by the mask.
231fd28638SDavid Hildenbrand */
zero_search(uint64_t a,uint64_t mask)241fd28638SDavid Hildenbrand static inline uint64_t zero_search(uint64_t a, uint64_t mask)
251fd28638SDavid Hildenbrand {
261fd28638SDavid Hildenbrand return ~(((a & mask) + mask) | a | mask);
271fd28638SDavid Hildenbrand }
281fd28638SDavid Hildenbrand
291fd28638SDavid Hildenbrand /*
30074e99b3SDavid Hildenbrand * Returns a bit set in the MSB of each element that is not zero,
31074e99b3SDavid Hildenbrand * as defined by the mask.
32074e99b3SDavid Hildenbrand */
nonzero_search(uint64_t a,uint64_t mask)33074e99b3SDavid Hildenbrand static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
34074e99b3SDavid Hildenbrand {
35074e99b3SDavid Hildenbrand return (((a & mask) + mask) | a) & ~mask;
36074e99b3SDavid Hildenbrand }
37074e99b3SDavid Hildenbrand
38074e99b3SDavid Hildenbrand /*
391fd28638SDavid Hildenbrand * Returns the byte offset for the first match, or 16 for no match.
401fd28638SDavid Hildenbrand */
match_index(uint64_t c0,uint64_t c1)411fd28638SDavid Hildenbrand static inline int match_index(uint64_t c0, uint64_t c1)
421fd28638SDavid Hildenbrand {
431fd28638SDavid Hildenbrand return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
441fd28638SDavid Hildenbrand }
451fd28638SDavid Hildenbrand
461fd28638SDavid Hildenbrand /*
471fd28638SDavid Hildenbrand * Returns the number of bits composing one element.
481fd28638SDavid Hildenbrand */
get_element_bits(uint8_t es)491fd28638SDavid Hildenbrand static uint8_t get_element_bits(uint8_t es)
501fd28638SDavid Hildenbrand {
511fd28638SDavid Hildenbrand return (1 << es) * BITS_PER_BYTE;
521fd28638SDavid Hildenbrand }
531fd28638SDavid Hildenbrand
541fd28638SDavid Hildenbrand /*
551fd28638SDavid Hildenbrand * Returns the bitmask for a single element.
561fd28638SDavid Hildenbrand */
get_single_element_mask(uint8_t es)571fd28638SDavid Hildenbrand static uint64_t get_single_element_mask(uint8_t es)
581fd28638SDavid Hildenbrand {
591fd28638SDavid Hildenbrand return -1ull >> (64 - get_element_bits(es));
601fd28638SDavid Hildenbrand }
611fd28638SDavid Hildenbrand
621fd28638SDavid Hildenbrand /*
631fd28638SDavid Hildenbrand * Returns the bitmask for a single element (excluding the MSB).
641fd28638SDavid Hildenbrand */
get_single_element_lsbs_mask(uint8_t es)651fd28638SDavid Hildenbrand static uint64_t get_single_element_lsbs_mask(uint8_t es)
661fd28638SDavid Hildenbrand {
671fd28638SDavid Hildenbrand return -1ull >> (65 - get_element_bits(es));
681fd28638SDavid Hildenbrand }
691fd28638SDavid Hildenbrand
701fd28638SDavid Hildenbrand /*
711fd28638SDavid Hildenbrand * Returns the bitmasks for multiple elements (excluding the MSBs).
721fd28638SDavid Hildenbrand */
get_element_lsbs_mask(uint8_t es)731fd28638SDavid Hildenbrand static uint64_t get_element_lsbs_mask(uint8_t es)
741fd28638SDavid Hildenbrand {
751fd28638SDavid Hildenbrand return dup_const(es, get_single_element_lsbs_mask(es));
761fd28638SDavid Hildenbrand }
771fd28638SDavid Hildenbrand
vfae(void * v1,const void * v2,const void * v3,bool in,bool rt,bool zs,uint8_t es)781fd28638SDavid Hildenbrand static int vfae(void *v1, const void *v2, const void *v3, bool in,
791fd28638SDavid Hildenbrand bool rt, bool zs, uint8_t es)
801fd28638SDavid Hildenbrand {
811fd28638SDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es);
821fd28638SDavid Hildenbrand const int bits = get_element_bits(es);
831fd28638SDavid Hildenbrand uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
841fd28638SDavid Hildenbrand uint64_t first_zero = 16;
851fd28638SDavid Hildenbrand uint64_t first_equal;
861fd28638SDavid Hildenbrand int i;
871fd28638SDavid Hildenbrand
881fd28638SDavid Hildenbrand a0 = s390_vec_read_element64(v2, 0);
891fd28638SDavid Hildenbrand a1 = s390_vec_read_element64(v2, 1);
901fd28638SDavid Hildenbrand b0 = s390_vec_read_element64(v3, 0);
911fd28638SDavid Hildenbrand b1 = s390_vec_read_element64(v3, 1);
921fd28638SDavid Hildenbrand e0 = 0;
931fd28638SDavid Hildenbrand e1 = 0;
941fd28638SDavid Hildenbrand /* compare against equality with every other element */
951fd28638SDavid Hildenbrand for (i = 0; i < 64; i += bits) {
961fd28638SDavid Hildenbrand t0 = rol64(b0, i);
971fd28638SDavid Hildenbrand t1 = rol64(b1, i);
981fd28638SDavid Hildenbrand e0 |= zero_search(a0 ^ t0, mask);
991fd28638SDavid Hildenbrand e0 |= zero_search(a0 ^ t1, mask);
1001fd28638SDavid Hildenbrand e1 |= zero_search(a1 ^ t0, mask);
1011fd28638SDavid Hildenbrand e1 |= zero_search(a1 ^ t1, mask);
1021fd28638SDavid Hildenbrand }
1031fd28638SDavid Hildenbrand /* invert the result if requested - invert only the MSBs */
1041fd28638SDavid Hildenbrand if (in) {
1051fd28638SDavid Hildenbrand e0 = ~e0 & ~mask;
1061fd28638SDavid Hildenbrand e1 = ~e1 & ~mask;
1071fd28638SDavid Hildenbrand }
1081fd28638SDavid Hildenbrand first_equal = match_index(e0, e1);
1091fd28638SDavid Hildenbrand
1101fd28638SDavid Hildenbrand if (zs) {
1111fd28638SDavid Hildenbrand z0 = zero_search(a0, mask);
1121fd28638SDavid Hildenbrand z1 = zero_search(a1, mask);
1131fd28638SDavid Hildenbrand first_zero = match_index(z0, z1);
1141fd28638SDavid Hildenbrand }
1151fd28638SDavid Hildenbrand
1161fd28638SDavid Hildenbrand if (rt) {
1171fd28638SDavid Hildenbrand e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
1181fd28638SDavid Hildenbrand e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
1191fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 0, e0);
1201fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 1, e1);
1211fd28638SDavid Hildenbrand } else {
1221fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
1231fd28638SDavid Hildenbrand s390_vec_write_element64(v1, 1, 0);
1241fd28638SDavid Hildenbrand }
1251fd28638SDavid Hildenbrand
1261fd28638SDavid Hildenbrand if (first_zero == 16 && first_equal == 16) {
1271fd28638SDavid Hildenbrand return 3; /* no match */
1281fd28638SDavid Hildenbrand } else if (first_zero == 16) {
1291fd28638SDavid Hildenbrand return 1; /* matching elements, no match for zero */
1301fd28638SDavid Hildenbrand } else if (first_equal < first_zero) {
1311fd28638SDavid Hildenbrand return 2; /* matching elements before match for zero */
1321fd28638SDavid Hildenbrand }
1331fd28638SDavid Hildenbrand return 0; /* match for zero */
1341fd28638SDavid Hildenbrand }
1351fd28638SDavid Hildenbrand
1361fd28638SDavid Hildenbrand #define DEF_VFAE_HELPER(BITS) \
1371fd28638SDavid Hildenbrand void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \
1381fd28638SDavid Hildenbrand uint32_t desc) \
1391fd28638SDavid Hildenbrand { \
1401fd28638SDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \
1411fd28638SDavid Hildenbrand const bool rt = extract32(simd_data(desc), 2, 1); \
1421fd28638SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \
1431fd28638SDavid Hildenbrand \
1441fd28638SDavid Hildenbrand vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
1451fd28638SDavid Hildenbrand }
1461fd28638SDavid Hildenbrand DEF_VFAE_HELPER(8)
1471fd28638SDavid Hildenbrand DEF_VFAE_HELPER(16)
1481fd28638SDavid Hildenbrand DEF_VFAE_HELPER(32)
1491fd28638SDavid Hildenbrand
1501fd28638SDavid Hildenbrand #define DEF_VFAE_CC_HELPER(BITS) \
1511fd28638SDavid Hildenbrand void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \
1521fd28638SDavid Hildenbrand CPUS390XState *env, uint32_t desc) \
1531fd28638SDavid Hildenbrand { \
1541fd28638SDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \
1551fd28638SDavid Hildenbrand const bool rt = extract32(simd_data(desc), 2, 1); \
1561fd28638SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \
1571fd28638SDavid Hildenbrand \
1581fd28638SDavid Hildenbrand env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
1591fd28638SDavid Hildenbrand }
1601fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(8)
1611fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(16)
1621fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(32)
1638c0e1e58SDavid Hildenbrand
vfee(void * v1,const void * v2,const void * v3,bool zs,uint8_t es)1648c0e1e58SDavid Hildenbrand static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
1658c0e1e58SDavid Hildenbrand {
1668c0e1e58SDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es);
1678c0e1e58SDavid Hildenbrand uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
1688c0e1e58SDavid Hildenbrand uint64_t first_zero = 16;
1698c0e1e58SDavid Hildenbrand uint64_t first_equal;
1708c0e1e58SDavid Hildenbrand
1718c0e1e58SDavid Hildenbrand a0 = s390_vec_read_element64(v2, 0);
1728c0e1e58SDavid Hildenbrand a1 = s390_vec_read_element64(v2, 1);
1738c0e1e58SDavid Hildenbrand b0 = s390_vec_read_element64(v3, 0);
1748c0e1e58SDavid Hildenbrand b1 = s390_vec_read_element64(v3, 1);
1758c0e1e58SDavid Hildenbrand e0 = zero_search(a0 ^ b0, mask);
1768c0e1e58SDavid Hildenbrand e1 = zero_search(a1 ^ b1, mask);
1778c0e1e58SDavid Hildenbrand first_equal = match_index(e0, e1);
1788c0e1e58SDavid Hildenbrand
1798c0e1e58SDavid Hildenbrand if (zs) {
1808c0e1e58SDavid Hildenbrand z0 = zero_search(a0, mask);
1818c0e1e58SDavid Hildenbrand z1 = zero_search(a1, mask);
1828c0e1e58SDavid Hildenbrand first_zero = match_index(z0, z1);
1838c0e1e58SDavid Hildenbrand }
1848c0e1e58SDavid Hildenbrand
1858c0e1e58SDavid Hildenbrand s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
1868c0e1e58SDavid Hildenbrand s390_vec_write_element64(v1, 1, 0);
1878c0e1e58SDavid Hildenbrand if (first_zero == 16 && first_equal == 16) {
1888c0e1e58SDavid Hildenbrand return 3; /* no match */
1898c0e1e58SDavid Hildenbrand } else if (first_zero == 16) {
1908c0e1e58SDavid Hildenbrand return 1; /* matching elements, no match for zero */
1918c0e1e58SDavid Hildenbrand } else if (first_equal < first_zero) {
1928c0e1e58SDavid Hildenbrand return 2; /* matching elements before match for zero */
1938c0e1e58SDavid Hildenbrand }
1948c0e1e58SDavid Hildenbrand return 0; /* match for zero */
1958c0e1e58SDavid Hildenbrand }
1968c0e1e58SDavid Hildenbrand
1978c0e1e58SDavid Hildenbrand #define DEF_VFEE_HELPER(BITS) \
1988c0e1e58SDavid Hildenbrand void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \
1998c0e1e58SDavid Hildenbrand uint32_t desc) \
2008c0e1e58SDavid Hildenbrand { \
2018c0e1e58SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \
2028c0e1e58SDavid Hildenbrand \
2038c0e1e58SDavid Hildenbrand vfee(v1, v2, v3, zs, MO_##BITS); \
2048c0e1e58SDavid Hildenbrand }
2058c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(8)
2068c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(16)
2078c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(32)
2088c0e1e58SDavid Hildenbrand
2098c0e1e58SDavid Hildenbrand #define DEF_VFEE_CC_HELPER(BITS) \
2108c0e1e58SDavid Hildenbrand void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \
2118c0e1e58SDavid Hildenbrand CPUS390XState *env, uint32_t desc) \
2128c0e1e58SDavid Hildenbrand { \
2138c0e1e58SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \
2148c0e1e58SDavid Hildenbrand \
2158c0e1e58SDavid Hildenbrand env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \
2168c0e1e58SDavid Hildenbrand }
2178c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(8)
2188c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(16)
2198c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(32)
220074e99b3SDavid Hildenbrand
vfene(void * v1,const void * v2,const void * v3,bool zs,uint8_t es)221074e99b3SDavid Hildenbrand static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
222074e99b3SDavid Hildenbrand {
223074e99b3SDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es);
224074e99b3SDavid Hildenbrand uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
225074e99b3SDavid Hildenbrand uint64_t first_zero = 16;
226074e99b3SDavid Hildenbrand uint64_t first_inequal;
227074e99b3SDavid Hildenbrand bool smaller = false;
228074e99b3SDavid Hildenbrand
229074e99b3SDavid Hildenbrand a0 = s390_vec_read_element64(v2, 0);
230074e99b3SDavid Hildenbrand a1 = s390_vec_read_element64(v2, 1);
231074e99b3SDavid Hildenbrand b0 = s390_vec_read_element64(v3, 0);
232074e99b3SDavid Hildenbrand b1 = s390_vec_read_element64(v3, 1);
233074e99b3SDavid Hildenbrand e0 = nonzero_search(a0 ^ b0, mask);
234074e99b3SDavid Hildenbrand e1 = nonzero_search(a1 ^ b1, mask);
235074e99b3SDavid Hildenbrand first_inequal = match_index(e0, e1);
236074e99b3SDavid Hildenbrand
237074e99b3SDavid Hildenbrand /* identify the smaller element */
238074e99b3SDavid Hildenbrand if (first_inequal < 16) {
239074e99b3SDavid Hildenbrand uint8_t enr = first_inequal / (1 << es);
240074e99b3SDavid Hildenbrand uint32_t a = s390_vec_read_element(v2, enr, es);
241074e99b3SDavid Hildenbrand uint32_t b = s390_vec_read_element(v3, enr, es);
242074e99b3SDavid Hildenbrand
243074e99b3SDavid Hildenbrand smaller = a < b;
244074e99b3SDavid Hildenbrand }
245074e99b3SDavid Hildenbrand
246074e99b3SDavid Hildenbrand if (zs) {
247074e99b3SDavid Hildenbrand z0 = zero_search(a0, mask);
248074e99b3SDavid Hildenbrand z1 = zero_search(a1, mask);
249074e99b3SDavid Hildenbrand first_zero = match_index(z0, z1);
250074e99b3SDavid Hildenbrand }
251074e99b3SDavid Hildenbrand
252074e99b3SDavid Hildenbrand s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
253074e99b3SDavid Hildenbrand s390_vec_write_element64(v1, 1, 0);
254074e99b3SDavid Hildenbrand if (first_zero == 16 && first_inequal == 16) {
255074e99b3SDavid Hildenbrand return 3;
256074e99b3SDavid Hildenbrand } else if (first_zero < first_inequal) {
257074e99b3SDavid Hildenbrand return 0;
258074e99b3SDavid Hildenbrand }
259074e99b3SDavid Hildenbrand return smaller ? 1 : 2;
260074e99b3SDavid Hildenbrand }
261074e99b3SDavid Hildenbrand
262074e99b3SDavid Hildenbrand #define DEF_VFENE_HELPER(BITS) \
263074e99b3SDavid Hildenbrand void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \
264074e99b3SDavid Hildenbrand uint32_t desc) \
265074e99b3SDavid Hildenbrand { \
266074e99b3SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \
267074e99b3SDavid Hildenbrand \
268074e99b3SDavid Hildenbrand vfene(v1, v2, v3, zs, MO_##BITS); \
269074e99b3SDavid Hildenbrand }
270074e99b3SDavid Hildenbrand DEF_VFENE_HELPER(8)
271074e99b3SDavid Hildenbrand DEF_VFENE_HELPER(16)
272074e99b3SDavid Hildenbrand DEF_VFENE_HELPER(32)
273074e99b3SDavid Hildenbrand
274074e99b3SDavid Hildenbrand #define DEF_VFENE_CC_HELPER(BITS) \
275074e99b3SDavid Hildenbrand void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \
276074e99b3SDavid Hildenbrand CPUS390XState *env, uint32_t desc) \
277074e99b3SDavid Hildenbrand { \
278074e99b3SDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \
279074e99b3SDavid Hildenbrand \
280074e99b3SDavid Hildenbrand env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \
281074e99b3SDavid Hildenbrand }
282074e99b3SDavid Hildenbrand DEF_VFENE_CC_HELPER(8)
283074e99b3SDavid Hildenbrand DEF_VFENE_CC_HELPER(16)
284074e99b3SDavid Hildenbrand DEF_VFENE_CC_HELPER(32)
285be6324c6SDavid Hildenbrand
vistr(void * v1,const void * v2,uint8_t es)286be6324c6SDavid Hildenbrand static int vistr(void *v1, const void *v2, uint8_t es)
287be6324c6SDavid Hildenbrand {
288be6324c6SDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es);
289be6324c6SDavid Hildenbrand uint64_t a0 = s390_vec_read_element64(v2, 0);
290be6324c6SDavid Hildenbrand uint64_t a1 = s390_vec_read_element64(v2, 1);
291be6324c6SDavid Hildenbrand uint64_t z;
292be6324c6SDavid Hildenbrand int cc = 3;
293be6324c6SDavid Hildenbrand
294be6324c6SDavid Hildenbrand z = zero_search(a0, mask);
295be6324c6SDavid Hildenbrand if (z) {
296be6324c6SDavid Hildenbrand a0 &= ~(-1ull >> clz64(z));
297be6324c6SDavid Hildenbrand a1 = 0;
298be6324c6SDavid Hildenbrand cc = 0;
299be6324c6SDavid Hildenbrand } else {
300be6324c6SDavid Hildenbrand z = zero_search(a1, mask);
301be6324c6SDavid Hildenbrand if (z) {
302be6324c6SDavid Hildenbrand a1 &= ~(-1ull >> clz64(z));
303be6324c6SDavid Hildenbrand cc = 0;
304be6324c6SDavid Hildenbrand }
305be6324c6SDavid Hildenbrand }
306be6324c6SDavid Hildenbrand
307be6324c6SDavid Hildenbrand s390_vec_write_element64(v1, 0, a0);
308be6324c6SDavid Hildenbrand s390_vec_write_element64(v1, 1, a1);
309be6324c6SDavid Hildenbrand return cc;
310be6324c6SDavid Hildenbrand }
311be6324c6SDavid Hildenbrand
312be6324c6SDavid Hildenbrand #define DEF_VISTR_HELPER(BITS) \
313be6324c6SDavid Hildenbrand void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \
314be6324c6SDavid Hildenbrand { \
315be6324c6SDavid Hildenbrand vistr(v1, v2, MO_##BITS); \
316be6324c6SDavid Hildenbrand }
317be6324c6SDavid Hildenbrand DEF_VISTR_HELPER(8)
318be6324c6SDavid Hildenbrand DEF_VISTR_HELPER(16)
319be6324c6SDavid Hildenbrand DEF_VISTR_HELPER(32)
320be6324c6SDavid Hildenbrand
321be6324c6SDavid Hildenbrand #define DEF_VISTR_CC_HELPER(BITS) \
322be6324c6SDavid Hildenbrand void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
323be6324c6SDavid Hildenbrand uint32_t desc) \
324be6324c6SDavid Hildenbrand { \
325be6324c6SDavid Hildenbrand env->cc_op = vistr(v1, v2, MO_##BITS); \
326be6324c6SDavid Hildenbrand }
327be6324c6SDavid Hildenbrand DEF_VISTR_CC_HELPER(8)
328be6324c6SDavid Hildenbrand DEF_VISTR_CC_HELPER(16)
329be6324c6SDavid Hildenbrand DEF_VISTR_CC_HELPER(32)
33013b0228fSDavid Hildenbrand
element_compare(uint32_t data,uint32_t l,uint8_t c)33113b0228fSDavid Hildenbrand static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
33213b0228fSDavid Hildenbrand {
33313b0228fSDavid Hildenbrand const bool equal = extract32(c, 7, 1);
33413b0228fSDavid Hildenbrand const bool lower = extract32(c, 6, 1);
33513b0228fSDavid Hildenbrand const bool higher = extract32(c, 5, 1);
33613b0228fSDavid Hildenbrand
33713b0228fSDavid Hildenbrand if (data < l) {
33813b0228fSDavid Hildenbrand return lower;
33913b0228fSDavid Hildenbrand } else if (data > l) {
34013b0228fSDavid Hildenbrand return higher;
34113b0228fSDavid Hildenbrand }
34213b0228fSDavid Hildenbrand return equal;
34313b0228fSDavid Hildenbrand }
34413b0228fSDavid Hildenbrand
vstrc(void * v1,const void * v2,const void * v3,const void * v4,bool in,bool rt,bool zs,uint8_t es)34513b0228fSDavid Hildenbrand static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
34613b0228fSDavid Hildenbrand bool in, bool rt, bool zs, uint8_t es)
34713b0228fSDavid Hildenbrand {
34813b0228fSDavid Hildenbrand const uint64_t mask = get_element_lsbs_mask(es);
34913b0228fSDavid Hildenbrand uint64_t a0 = s390_vec_read_element64(v2, 0);
35013b0228fSDavid Hildenbrand uint64_t a1 = s390_vec_read_element64(v2, 1);
35113b0228fSDavid Hildenbrand int first_zero = 16, first_match = 16;
35213b0228fSDavid Hildenbrand S390Vector rt_result = {};
35313b0228fSDavid Hildenbrand uint64_t z0, z1;
35413b0228fSDavid Hildenbrand int i, j;
35513b0228fSDavid Hildenbrand
35613b0228fSDavid Hildenbrand if (zs) {
35713b0228fSDavid Hildenbrand z0 = zero_search(a0, mask);
35813b0228fSDavid Hildenbrand z1 = zero_search(a1, mask);
35913b0228fSDavid Hildenbrand first_zero = match_index(z0, z1);
36013b0228fSDavid Hildenbrand }
36113b0228fSDavid Hildenbrand
36213b0228fSDavid Hildenbrand for (i = 0; i < 16 / (1 << es); i++) {
36313b0228fSDavid Hildenbrand const uint32_t data = s390_vec_read_element(v2, i, es);
36413b0228fSDavid Hildenbrand const int cur_byte = i * (1 << es);
36513b0228fSDavid Hildenbrand bool any_match = false;
36613b0228fSDavid Hildenbrand
36713b0228fSDavid Hildenbrand /* if we don't need a bit vector, we can stop early */
36813b0228fSDavid Hildenbrand if (cur_byte == first_zero && !rt) {
36913b0228fSDavid Hildenbrand break;
37013b0228fSDavid Hildenbrand }
37113b0228fSDavid Hildenbrand
37213b0228fSDavid Hildenbrand for (j = 0; j < 16 / (1 << es); j += 2) {
37313b0228fSDavid Hildenbrand const uint32_t l1 = s390_vec_read_element(v3, j, es);
37413b0228fSDavid Hildenbrand const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
37513b0228fSDavid Hildenbrand /* we are only interested in the highest byte of each element */
37613b0228fSDavid Hildenbrand const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
37713b0228fSDavid Hildenbrand const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
37813b0228fSDavid Hildenbrand
37913b0228fSDavid Hildenbrand if (element_compare(data, l1, c1) &&
38013b0228fSDavid Hildenbrand element_compare(data, l2, c2)) {
38113b0228fSDavid Hildenbrand any_match = true;
38213b0228fSDavid Hildenbrand break;
38313b0228fSDavid Hildenbrand }
38413b0228fSDavid Hildenbrand }
38513b0228fSDavid Hildenbrand /* invert the result if requested */
38613b0228fSDavid Hildenbrand any_match = in ^ any_match;
38713b0228fSDavid Hildenbrand
38813b0228fSDavid Hildenbrand if (any_match) {
38913b0228fSDavid Hildenbrand /* indicate bit vector if requested */
39013b0228fSDavid Hildenbrand if (rt) {
39113b0228fSDavid Hildenbrand const uint64_t val = -1ull;
39213b0228fSDavid Hildenbrand
39313b0228fSDavid Hildenbrand first_match = MIN(cur_byte, first_match);
39413b0228fSDavid Hildenbrand s390_vec_write_element(&rt_result, i, es, val);
39513b0228fSDavid Hildenbrand } else {
39613b0228fSDavid Hildenbrand /* stop on the first match */
39713b0228fSDavid Hildenbrand first_match = cur_byte;
39813b0228fSDavid Hildenbrand break;
39913b0228fSDavid Hildenbrand }
40013b0228fSDavid Hildenbrand }
40113b0228fSDavid Hildenbrand }
40213b0228fSDavid Hildenbrand
40313b0228fSDavid Hildenbrand if (rt) {
40413b0228fSDavid Hildenbrand *(S390Vector *)v1 = rt_result;
40513b0228fSDavid Hildenbrand } else {
40613b0228fSDavid Hildenbrand s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
40713b0228fSDavid Hildenbrand s390_vec_write_element64(v1, 1, 0);
40813b0228fSDavid Hildenbrand }
40913b0228fSDavid Hildenbrand
41013b0228fSDavid Hildenbrand if (first_zero == 16 && first_match == 16) {
41113b0228fSDavid Hildenbrand return 3; /* no match */
41213b0228fSDavid Hildenbrand } else if (first_zero == 16) {
41313b0228fSDavid Hildenbrand return 1; /* matching elements, no match for zero */
41413b0228fSDavid Hildenbrand } else if (first_match < first_zero) {
41513b0228fSDavid Hildenbrand return 2; /* matching elements before match for zero */
41613b0228fSDavid Hildenbrand }
41713b0228fSDavid Hildenbrand return 0; /* match for zero */
41813b0228fSDavid Hildenbrand }
41913b0228fSDavid Hildenbrand
42013b0228fSDavid Hildenbrand #define DEF_VSTRC_HELPER(BITS) \
42113b0228fSDavid Hildenbrand void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \
42213b0228fSDavid Hildenbrand const void *v4, uint32_t desc) \
42313b0228fSDavid Hildenbrand { \
42413b0228fSDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \
42513b0228fSDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \
42613b0228fSDavid Hildenbrand \
42713b0228fSDavid Hildenbrand vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
42813b0228fSDavid Hildenbrand }
42913b0228fSDavid Hildenbrand DEF_VSTRC_HELPER(8)
43013b0228fSDavid Hildenbrand DEF_VSTRC_HELPER(16)
43113b0228fSDavid Hildenbrand DEF_VSTRC_HELPER(32)
43213b0228fSDavid Hildenbrand
43313b0228fSDavid Hildenbrand #define DEF_VSTRC_RT_HELPER(BITS) \
43413b0228fSDavid Hildenbrand void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \
43513b0228fSDavid Hildenbrand const void *v4, uint32_t desc) \
43613b0228fSDavid Hildenbrand { \
43713b0228fSDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \
43813b0228fSDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \
43913b0228fSDavid Hildenbrand \
44013b0228fSDavid Hildenbrand vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
44113b0228fSDavid Hildenbrand }
44213b0228fSDavid Hildenbrand DEF_VSTRC_RT_HELPER(8)
44313b0228fSDavid Hildenbrand DEF_VSTRC_RT_HELPER(16)
44413b0228fSDavid Hildenbrand DEF_VSTRC_RT_HELPER(32)
44513b0228fSDavid Hildenbrand
44613b0228fSDavid Hildenbrand #define DEF_VSTRC_CC_HELPER(BITS) \
44713b0228fSDavid Hildenbrand void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \
44813b0228fSDavid Hildenbrand const void *v4, CPUS390XState *env, \
44913b0228fSDavid Hildenbrand uint32_t desc) \
45013b0228fSDavid Hildenbrand { \
45113b0228fSDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \
45213b0228fSDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \
45313b0228fSDavid Hildenbrand \
45413b0228fSDavid Hildenbrand env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
45513b0228fSDavid Hildenbrand }
45613b0228fSDavid Hildenbrand DEF_VSTRC_CC_HELPER(8)
45713b0228fSDavid Hildenbrand DEF_VSTRC_CC_HELPER(16)
45813b0228fSDavid Hildenbrand DEF_VSTRC_CC_HELPER(32)
45913b0228fSDavid Hildenbrand
46013b0228fSDavid Hildenbrand #define DEF_VSTRC_CC_RT_HELPER(BITS) \
46113b0228fSDavid Hildenbrand void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \
46213b0228fSDavid Hildenbrand const void *v4, CPUS390XState *env, \
46313b0228fSDavid Hildenbrand uint32_t desc) \
46413b0228fSDavid Hildenbrand { \
46513b0228fSDavid Hildenbrand const bool in = extract32(simd_data(desc), 3, 1); \
46613b0228fSDavid Hildenbrand const bool zs = extract32(simd_data(desc), 1, 1); \
46713b0228fSDavid Hildenbrand \
46813b0228fSDavid Hildenbrand env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
46913b0228fSDavid Hildenbrand }
47013b0228fSDavid Hildenbrand DEF_VSTRC_CC_RT_HELPER(8)
47113b0228fSDavid Hildenbrand DEF_VSTRC_CC_RT_HELPER(16)
47213b0228fSDavid Hildenbrand DEF_VSTRC_CC_RT_HELPER(32)
4731d706f31SDavid Miller
vstrs(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,const S390Vector * v4,uint8_t es,bool zs)4741d706f31SDavid Miller static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
4751d706f31SDavid Miller const S390Vector *v4, uint8_t es, bool zs)
4761d706f31SDavid Miller {
477*791b2b6aSIlya Leoshkevich int substr_elen, i, j, k, cc;
4781d706f31SDavid Miller int nelem = 16 >> es;
479*791b2b6aSIlya Leoshkevich int str_leftmost_0;
4801d706f31SDavid Miller
4811d706f31SDavid Miller substr_elen = s390_vec_read_element8(v4, 7) >> es;
4821d706f31SDavid Miller
4831d706f31SDavid Miller /* If ZS, bound substr length by min(nelem, strlen(v3)). */
4841d706f31SDavid Miller if (zs) {
4851d706f31SDavid Miller substr_elen = MIN(substr_elen, nelem);
4861d706f31SDavid Miller for (i = 0; i < substr_elen; i++) {
4871d706f31SDavid Miller if (s390_vec_read_element(v3, i, es) == 0) {
4881d706f31SDavid Miller substr_elen = i;
4891d706f31SDavid Miller break;
4901d706f31SDavid Miller }
4911d706f31SDavid Miller }
4921d706f31SDavid Miller }
4931d706f31SDavid Miller
4941d706f31SDavid Miller if (substr_elen == 0) {
4951d706f31SDavid Miller cc = 2; /* full match for degenerate case of empty substr */
4961d706f31SDavid Miller k = 0;
4971d706f31SDavid Miller goto done;
4981d706f31SDavid Miller }
4991d706f31SDavid Miller
5001d706f31SDavid Miller /* If ZS, look for eos in the searched string. */
501*791b2b6aSIlya Leoshkevich str_leftmost_0 = nelem;
5021d706f31SDavid Miller if (zs) {
5031d706f31SDavid Miller for (k = 0; k < nelem; k++) {
5041d706f31SDavid Miller if (s390_vec_read_element(v2, k, es) == 0) {
505*791b2b6aSIlya Leoshkevich str_leftmost_0 = k;
5061d706f31SDavid Miller break;
5071d706f31SDavid Miller }
5081d706f31SDavid Miller }
5091d706f31SDavid Miller }
5101d706f31SDavid Miller
511*791b2b6aSIlya Leoshkevich cc = str_leftmost_0 == nelem ? 0 : 1; /* No match. */
512*791b2b6aSIlya Leoshkevich for (k = 0; k < nelem; k++) {
5131d706f31SDavid Miller i = MIN(nelem, k + substr_elen);
514*791b2b6aSIlya Leoshkevich for (j = k; j < i; j++) {
5151d706f31SDavid Miller uint32_t e2 = s390_vec_read_element(v2, j, es);
5161d706f31SDavid Miller uint32_t e3 = s390_vec_read_element(v3, j - k, es);
5171d706f31SDavid Miller if (e2 != e3) {
5181d706f31SDavid Miller break;
5191d706f31SDavid Miller }
5201d706f31SDavid Miller }
5211d706f31SDavid Miller if (j == i) {
522*791b2b6aSIlya Leoshkevich /* All elements matched. */
523*791b2b6aSIlya Leoshkevich if (k > str_leftmost_0) {
524*791b2b6aSIlya Leoshkevich cc = 1; /* Ignored match. */
525*791b2b6aSIlya Leoshkevich k = nelem;
526*791b2b6aSIlya Leoshkevich } else if (i - k == substr_elen) {
527*791b2b6aSIlya Leoshkevich cc = 2; /* Full match. */
528*791b2b6aSIlya Leoshkevich } else {
529*791b2b6aSIlya Leoshkevich cc = 3; /* Partial match. */
530*791b2b6aSIlya Leoshkevich }
531*791b2b6aSIlya Leoshkevich break;
5321d706f31SDavid Miller }
5331d706f31SDavid Miller }
5341d706f31SDavid Miller
5351d706f31SDavid Miller done:
5361d706f31SDavid Miller s390_vec_write_element64(v1, 0, k << es);
5371d706f31SDavid Miller s390_vec_write_element64(v1, 1, 0);
5381d706f31SDavid Miller return cc;
5391d706f31SDavid Miller }
5401d706f31SDavid Miller
5411d706f31SDavid Miller #define DEF_VSTRS_HELPER(BITS) \
5421d706f31SDavid Miller void QEMU_FLATTEN HELPER(gvec_vstrs_##BITS)(void *v1, const void *v2, \
5431d706f31SDavid Miller const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \
5441d706f31SDavid Miller { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, false); } \
5451d706f31SDavid Miller void QEMU_FLATTEN HELPER(gvec_vstrs_zs##BITS)(void *v1, const void *v2, \
5461d706f31SDavid Miller const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \
5471d706f31SDavid Miller { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, true); }
5481d706f31SDavid Miller
5491d706f31SDavid Miller DEF_VSTRS_HELPER(8)
5501d706f31SDavid Miller DEF_VSTRS_HELPER(16)
5511d706f31SDavid Miller DEF_VSTRS_HELPER(32)
552