xref: /qemu/target/s390x/tcg/vec_string_helper.c (revision 791b2b6a930273db694b9ba48bbb406e78715927)
11fd28638SDavid Hildenbrand /*
21fd28638SDavid Hildenbrand  * QEMU TCG support -- s390x vector string instruction support
31fd28638SDavid Hildenbrand  *
41fd28638SDavid Hildenbrand  * Copyright (C) 2019 Red Hat Inc
51fd28638SDavid Hildenbrand  *
61fd28638SDavid Hildenbrand  * Authors:
71fd28638SDavid Hildenbrand  *   David Hildenbrand <david@redhat.com>
81fd28638SDavid Hildenbrand  *
91fd28638SDavid Hildenbrand  * This work is licensed under the terms of the GNU GPL, version 2 or later.
101fd28638SDavid Hildenbrand  * See the COPYING file in the top-level directory.
111fd28638SDavid Hildenbrand  */
121fd28638SDavid Hildenbrand #include "qemu/osdep.h"
131fd28638SDavid Hildenbrand #include "cpu.h"
14b6b47223SCho, Yu-Chen #include "s390x-internal.h"
151fd28638SDavid Hildenbrand #include "vec.h"
161fd28638SDavid Hildenbrand #include "tcg/tcg.h"
171fd28638SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h"
181fd28638SDavid Hildenbrand #include "exec/helper-proto.h"
191fd28638SDavid Hildenbrand 
201fd28638SDavid Hildenbrand /*
211fd28638SDavid Hildenbrand  * Returns a bit set in the MSB of each element that is zero,
221fd28638SDavid Hildenbrand  * as defined by the mask.
231fd28638SDavid Hildenbrand  */
zero_search(uint64_t a,uint64_t mask)241fd28638SDavid Hildenbrand static inline uint64_t zero_search(uint64_t a, uint64_t mask)
251fd28638SDavid Hildenbrand {
261fd28638SDavid Hildenbrand     return ~(((a & mask) + mask) | a | mask);
271fd28638SDavid Hildenbrand }
281fd28638SDavid Hildenbrand 
291fd28638SDavid Hildenbrand /*
30074e99b3SDavid Hildenbrand  * Returns a bit set in the MSB of each element that is not zero,
31074e99b3SDavid Hildenbrand  * as defined by the mask.
32074e99b3SDavid Hildenbrand  */
nonzero_search(uint64_t a,uint64_t mask)33074e99b3SDavid Hildenbrand static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
34074e99b3SDavid Hildenbrand {
35074e99b3SDavid Hildenbrand     return (((a & mask) + mask) | a) & ~mask;
36074e99b3SDavid Hildenbrand }
37074e99b3SDavid Hildenbrand 
38074e99b3SDavid Hildenbrand /*
391fd28638SDavid Hildenbrand  * Returns the byte offset for the first match, or 16 for no match.
401fd28638SDavid Hildenbrand  */
match_index(uint64_t c0,uint64_t c1)411fd28638SDavid Hildenbrand static inline int match_index(uint64_t c0, uint64_t c1)
421fd28638SDavid Hildenbrand {
431fd28638SDavid Hildenbrand     return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
441fd28638SDavid Hildenbrand }
451fd28638SDavid Hildenbrand 
461fd28638SDavid Hildenbrand /*
471fd28638SDavid Hildenbrand  * Returns the number of bits composing one element.
481fd28638SDavid Hildenbrand  */
get_element_bits(uint8_t es)491fd28638SDavid Hildenbrand static uint8_t get_element_bits(uint8_t es)
501fd28638SDavid Hildenbrand {
511fd28638SDavid Hildenbrand     return (1 << es) * BITS_PER_BYTE;
521fd28638SDavid Hildenbrand }
531fd28638SDavid Hildenbrand 
541fd28638SDavid Hildenbrand /*
551fd28638SDavid Hildenbrand  * Returns the bitmask for a single element.
561fd28638SDavid Hildenbrand  */
get_single_element_mask(uint8_t es)571fd28638SDavid Hildenbrand static uint64_t get_single_element_mask(uint8_t es)
581fd28638SDavid Hildenbrand {
591fd28638SDavid Hildenbrand     return -1ull >> (64 - get_element_bits(es));
601fd28638SDavid Hildenbrand }
611fd28638SDavid Hildenbrand 
621fd28638SDavid Hildenbrand /*
631fd28638SDavid Hildenbrand  * Returns the bitmask for a single element (excluding the MSB).
641fd28638SDavid Hildenbrand  */
get_single_element_lsbs_mask(uint8_t es)651fd28638SDavid Hildenbrand static uint64_t get_single_element_lsbs_mask(uint8_t es)
661fd28638SDavid Hildenbrand {
671fd28638SDavid Hildenbrand     return -1ull >> (65 - get_element_bits(es));
681fd28638SDavid Hildenbrand }
691fd28638SDavid Hildenbrand 
701fd28638SDavid Hildenbrand /*
711fd28638SDavid Hildenbrand  * Returns the bitmasks for multiple elements (excluding the MSBs).
721fd28638SDavid Hildenbrand  */
get_element_lsbs_mask(uint8_t es)731fd28638SDavid Hildenbrand static uint64_t get_element_lsbs_mask(uint8_t es)
741fd28638SDavid Hildenbrand {
751fd28638SDavid Hildenbrand     return dup_const(es, get_single_element_lsbs_mask(es));
761fd28638SDavid Hildenbrand }
771fd28638SDavid Hildenbrand 
vfae(void * v1,const void * v2,const void * v3,bool in,bool rt,bool zs,uint8_t es)781fd28638SDavid Hildenbrand static int vfae(void *v1, const void *v2, const void *v3, bool in,
791fd28638SDavid Hildenbrand                 bool rt, bool zs, uint8_t es)
801fd28638SDavid Hildenbrand {
811fd28638SDavid Hildenbrand     const uint64_t mask = get_element_lsbs_mask(es);
821fd28638SDavid Hildenbrand     const int bits = get_element_bits(es);
831fd28638SDavid Hildenbrand     uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
841fd28638SDavid Hildenbrand     uint64_t first_zero = 16;
851fd28638SDavid Hildenbrand     uint64_t first_equal;
861fd28638SDavid Hildenbrand     int i;
871fd28638SDavid Hildenbrand 
881fd28638SDavid Hildenbrand     a0 = s390_vec_read_element64(v2, 0);
891fd28638SDavid Hildenbrand     a1 = s390_vec_read_element64(v2, 1);
901fd28638SDavid Hildenbrand     b0 = s390_vec_read_element64(v3, 0);
911fd28638SDavid Hildenbrand     b1 = s390_vec_read_element64(v3, 1);
921fd28638SDavid Hildenbrand     e0 = 0;
931fd28638SDavid Hildenbrand     e1 = 0;
941fd28638SDavid Hildenbrand     /* compare against equality with every other element */
951fd28638SDavid Hildenbrand     for (i = 0; i < 64; i += bits) {
961fd28638SDavid Hildenbrand         t0 = rol64(b0, i);
971fd28638SDavid Hildenbrand         t1 = rol64(b1, i);
981fd28638SDavid Hildenbrand         e0 |= zero_search(a0 ^ t0, mask);
991fd28638SDavid Hildenbrand         e0 |= zero_search(a0 ^ t1, mask);
1001fd28638SDavid Hildenbrand         e1 |= zero_search(a1 ^ t0, mask);
1011fd28638SDavid Hildenbrand         e1 |= zero_search(a1 ^ t1, mask);
1021fd28638SDavid Hildenbrand     }
1031fd28638SDavid Hildenbrand     /* invert the result if requested - invert only the MSBs */
1041fd28638SDavid Hildenbrand     if (in) {
1051fd28638SDavid Hildenbrand         e0 = ~e0 & ~mask;
1061fd28638SDavid Hildenbrand         e1 = ~e1 & ~mask;
1071fd28638SDavid Hildenbrand     }
1081fd28638SDavid Hildenbrand     first_equal = match_index(e0, e1);
1091fd28638SDavid Hildenbrand 
1101fd28638SDavid Hildenbrand     if (zs) {
1111fd28638SDavid Hildenbrand         z0 = zero_search(a0, mask);
1121fd28638SDavid Hildenbrand         z1 = zero_search(a1, mask);
1131fd28638SDavid Hildenbrand         first_zero = match_index(z0, z1);
1141fd28638SDavid Hildenbrand     }
1151fd28638SDavid Hildenbrand 
1161fd28638SDavid Hildenbrand     if (rt) {
1171fd28638SDavid Hildenbrand         e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
1181fd28638SDavid Hildenbrand         e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
1191fd28638SDavid Hildenbrand         s390_vec_write_element64(v1, 0, e0);
1201fd28638SDavid Hildenbrand         s390_vec_write_element64(v1, 1, e1);
1211fd28638SDavid Hildenbrand     } else {
1221fd28638SDavid Hildenbrand         s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
1231fd28638SDavid Hildenbrand         s390_vec_write_element64(v1, 1, 0);
1241fd28638SDavid Hildenbrand     }
1251fd28638SDavid Hildenbrand 
1261fd28638SDavid Hildenbrand     if (first_zero == 16 && first_equal == 16) {
1271fd28638SDavid Hildenbrand         return 3; /* no match */
1281fd28638SDavid Hildenbrand     } else if (first_zero == 16) {
1291fd28638SDavid Hildenbrand         return 1; /* matching elements, no match for zero */
1301fd28638SDavid Hildenbrand     } else if (first_equal < first_zero) {
1311fd28638SDavid Hildenbrand         return 2; /* matching elements before match for zero */
1321fd28638SDavid Hildenbrand     }
1331fd28638SDavid Hildenbrand     return 0; /* match for zero */
1341fd28638SDavid Hildenbrand }
1351fd28638SDavid Hildenbrand 
1361fd28638SDavid Hildenbrand #define DEF_VFAE_HELPER(BITS)                                                  \
1371fd28638SDavid Hildenbrand void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
1381fd28638SDavid Hildenbrand                              uint32_t desc)                                    \
1391fd28638SDavid Hildenbrand {                                                                              \
1401fd28638SDavid Hildenbrand     const bool in = extract32(simd_data(desc), 3, 1);                          \
1411fd28638SDavid Hildenbrand     const bool rt = extract32(simd_data(desc), 2, 1);                          \
1421fd28638SDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
1431fd28638SDavid Hildenbrand                                                                                \
1441fd28638SDavid Hildenbrand     vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
1451fd28638SDavid Hildenbrand }
1461fd28638SDavid Hildenbrand DEF_VFAE_HELPER(8)
1471fd28638SDavid Hildenbrand DEF_VFAE_HELPER(16)
1481fd28638SDavid Hildenbrand DEF_VFAE_HELPER(32)
1491fd28638SDavid Hildenbrand 
1501fd28638SDavid Hildenbrand #define DEF_VFAE_CC_HELPER(BITS)                                               \
1511fd28638SDavid Hildenbrand void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
1521fd28638SDavid Hildenbrand                                 CPUS390XState *env, uint32_t desc)             \
1531fd28638SDavid Hildenbrand {                                                                              \
1541fd28638SDavid Hildenbrand     const bool in = extract32(simd_data(desc), 3, 1);                          \
1551fd28638SDavid Hildenbrand     const bool rt = extract32(simd_data(desc), 2, 1);                          \
1561fd28638SDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
1571fd28638SDavid Hildenbrand                                                                                \
1581fd28638SDavid Hildenbrand     env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
1591fd28638SDavid Hildenbrand }
1601fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(8)
1611fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(16)
1621fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(32)
1638c0e1e58SDavid Hildenbrand 
vfee(void * v1,const void * v2,const void * v3,bool zs,uint8_t es)1648c0e1e58SDavid Hildenbrand static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
1658c0e1e58SDavid Hildenbrand {
1668c0e1e58SDavid Hildenbrand     const uint64_t mask = get_element_lsbs_mask(es);
1678c0e1e58SDavid Hildenbrand     uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
1688c0e1e58SDavid Hildenbrand     uint64_t first_zero = 16;
1698c0e1e58SDavid Hildenbrand     uint64_t first_equal;
1708c0e1e58SDavid Hildenbrand 
1718c0e1e58SDavid Hildenbrand     a0 = s390_vec_read_element64(v2, 0);
1728c0e1e58SDavid Hildenbrand     a1 = s390_vec_read_element64(v2, 1);
1738c0e1e58SDavid Hildenbrand     b0 = s390_vec_read_element64(v3, 0);
1748c0e1e58SDavid Hildenbrand     b1 = s390_vec_read_element64(v3, 1);
1758c0e1e58SDavid Hildenbrand     e0 = zero_search(a0 ^ b0, mask);
1768c0e1e58SDavid Hildenbrand     e1 = zero_search(a1 ^ b1, mask);
1778c0e1e58SDavid Hildenbrand     first_equal = match_index(e0, e1);
1788c0e1e58SDavid Hildenbrand 
1798c0e1e58SDavid Hildenbrand     if (zs) {
1808c0e1e58SDavid Hildenbrand         z0 = zero_search(a0, mask);
1818c0e1e58SDavid Hildenbrand         z1 = zero_search(a1, mask);
1828c0e1e58SDavid Hildenbrand         first_zero = match_index(z0, z1);
1838c0e1e58SDavid Hildenbrand     }
1848c0e1e58SDavid Hildenbrand 
1858c0e1e58SDavid Hildenbrand     s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
1868c0e1e58SDavid Hildenbrand     s390_vec_write_element64(v1, 1, 0);
1878c0e1e58SDavid Hildenbrand     if (first_zero == 16 && first_equal == 16) {
1888c0e1e58SDavid Hildenbrand         return 3; /* no match */
1898c0e1e58SDavid Hildenbrand     } else if (first_zero == 16) {
1908c0e1e58SDavid Hildenbrand         return 1; /* matching elements, no match for zero */
1918c0e1e58SDavid Hildenbrand     } else if (first_equal < first_zero) {
1928c0e1e58SDavid Hildenbrand         return 2; /* matching elements before match for zero */
1938c0e1e58SDavid Hildenbrand     }
1948c0e1e58SDavid Hildenbrand     return 0; /* match for zero */
1958c0e1e58SDavid Hildenbrand }
1968c0e1e58SDavid Hildenbrand 
1978c0e1e58SDavid Hildenbrand #define DEF_VFEE_HELPER(BITS)                                                  \
1988c0e1e58SDavid Hildenbrand void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3,         \
1998c0e1e58SDavid Hildenbrand                              uint32_t desc)                                    \
2008c0e1e58SDavid Hildenbrand {                                                                              \
2018c0e1e58SDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
2028c0e1e58SDavid Hildenbrand                                                                                \
2038c0e1e58SDavid Hildenbrand     vfee(v1, v2, v3, zs, MO_##BITS);                                           \
2048c0e1e58SDavid Hildenbrand }
2058c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(8)
2068c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(16)
2078c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(32)
2088c0e1e58SDavid Hildenbrand 
2098c0e1e58SDavid Hildenbrand #define DEF_VFEE_CC_HELPER(BITS)                                               \
2108c0e1e58SDavid Hildenbrand void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
2118c0e1e58SDavid Hildenbrand                                 CPUS390XState *env, uint32_t desc)             \
2128c0e1e58SDavid Hildenbrand {                                                                              \
2138c0e1e58SDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
2148c0e1e58SDavid Hildenbrand                                                                                \
2158c0e1e58SDavid Hildenbrand     env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS);                              \
2168c0e1e58SDavid Hildenbrand }
2178c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(8)
2188c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(16)
2198c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(32)
220074e99b3SDavid Hildenbrand 
vfene(void * v1,const void * v2,const void * v3,bool zs,uint8_t es)221074e99b3SDavid Hildenbrand static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
222074e99b3SDavid Hildenbrand {
223074e99b3SDavid Hildenbrand     const uint64_t mask = get_element_lsbs_mask(es);
224074e99b3SDavid Hildenbrand     uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
225074e99b3SDavid Hildenbrand     uint64_t first_zero = 16;
226074e99b3SDavid Hildenbrand     uint64_t first_inequal;
227074e99b3SDavid Hildenbrand     bool smaller = false;
228074e99b3SDavid Hildenbrand 
229074e99b3SDavid Hildenbrand     a0 = s390_vec_read_element64(v2, 0);
230074e99b3SDavid Hildenbrand     a1 = s390_vec_read_element64(v2, 1);
231074e99b3SDavid Hildenbrand     b0 = s390_vec_read_element64(v3, 0);
232074e99b3SDavid Hildenbrand     b1 = s390_vec_read_element64(v3, 1);
233074e99b3SDavid Hildenbrand     e0 = nonzero_search(a0 ^ b0, mask);
234074e99b3SDavid Hildenbrand     e1 = nonzero_search(a1 ^ b1, mask);
235074e99b3SDavid Hildenbrand     first_inequal = match_index(e0, e1);
236074e99b3SDavid Hildenbrand 
237074e99b3SDavid Hildenbrand     /* identify the smaller element */
238074e99b3SDavid Hildenbrand     if (first_inequal < 16) {
239074e99b3SDavid Hildenbrand         uint8_t enr = first_inequal / (1 << es);
240074e99b3SDavid Hildenbrand         uint32_t a = s390_vec_read_element(v2, enr, es);
241074e99b3SDavid Hildenbrand         uint32_t b = s390_vec_read_element(v3, enr, es);
242074e99b3SDavid Hildenbrand 
243074e99b3SDavid Hildenbrand         smaller = a < b;
244074e99b3SDavid Hildenbrand     }
245074e99b3SDavid Hildenbrand 
246074e99b3SDavid Hildenbrand     if (zs) {
247074e99b3SDavid Hildenbrand         z0 = zero_search(a0, mask);
248074e99b3SDavid Hildenbrand         z1 = zero_search(a1, mask);
249074e99b3SDavid Hildenbrand         first_zero = match_index(z0, z1);
250074e99b3SDavid Hildenbrand     }
251074e99b3SDavid Hildenbrand 
252074e99b3SDavid Hildenbrand     s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
253074e99b3SDavid Hildenbrand     s390_vec_write_element64(v1, 1, 0);
254074e99b3SDavid Hildenbrand     if (first_zero == 16 && first_inequal == 16) {
255074e99b3SDavid Hildenbrand         return 3;
256074e99b3SDavid Hildenbrand     } else if (first_zero < first_inequal) {
257074e99b3SDavid Hildenbrand         return 0;
258074e99b3SDavid Hildenbrand     }
259074e99b3SDavid Hildenbrand     return smaller ? 1 : 2;
260074e99b3SDavid Hildenbrand }
261074e99b3SDavid Hildenbrand 
262074e99b3SDavid Hildenbrand #define DEF_VFENE_HELPER(BITS)                                                 \
263074e99b3SDavid Hildenbrand void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3,        \
264074e99b3SDavid Hildenbrand                               uint32_t desc)                                   \
265074e99b3SDavid Hildenbrand {                                                                              \
266074e99b3SDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
267074e99b3SDavid Hildenbrand                                                                                \
268074e99b3SDavid Hildenbrand     vfene(v1, v2, v3, zs, MO_##BITS);                                          \
269074e99b3SDavid Hildenbrand }
270074e99b3SDavid Hildenbrand DEF_VFENE_HELPER(8)
271074e99b3SDavid Hildenbrand DEF_VFENE_HELPER(16)
272074e99b3SDavid Hildenbrand DEF_VFENE_HELPER(32)
273074e99b3SDavid Hildenbrand 
274074e99b3SDavid Hildenbrand #define DEF_VFENE_CC_HELPER(BITS)                                              \
275074e99b3SDavid Hildenbrand void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
276074e99b3SDavid Hildenbrand                                  CPUS390XState *env, uint32_t desc)            \
277074e99b3SDavid Hildenbrand {                                                                              \
278074e99b3SDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
279074e99b3SDavid Hildenbrand                                                                                \
280074e99b3SDavid Hildenbrand     env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS);                             \
281074e99b3SDavid Hildenbrand }
282074e99b3SDavid Hildenbrand DEF_VFENE_CC_HELPER(8)
283074e99b3SDavid Hildenbrand DEF_VFENE_CC_HELPER(16)
284074e99b3SDavid Hildenbrand DEF_VFENE_CC_HELPER(32)
285be6324c6SDavid Hildenbrand 
vistr(void * v1,const void * v2,uint8_t es)286be6324c6SDavid Hildenbrand static int vistr(void *v1, const void *v2, uint8_t es)
287be6324c6SDavid Hildenbrand {
288be6324c6SDavid Hildenbrand     const uint64_t mask = get_element_lsbs_mask(es);
289be6324c6SDavid Hildenbrand     uint64_t a0 = s390_vec_read_element64(v2, 0);
290be6324c6SDavid Hildenbrand     uint64_t a1 = s390_vec_read_element64(v2, 1);
291be6324c6SDavid Hildenbrand     uint64_t z;
292be6324c6SDavid Hildenbrand     int cc = 3;
293be6324c6SDavid Hildenbrand 
294be6324c6SDavid Hildenbrand     z = zero_search(a0, mask);
295be6324c6SDavid Hildenbrand     if (z) {
296be6324c6SDavid Hildenbrand         a0 &= ~(-1ull >> clz64(z));
297be6324c6SDavid Hildenbrand         a1 = 0;
298be6324c6SDavid Hildenbrand         cc = 0;
299be6324c6SDavid Hildenbrand     } else {
300be6324c6SDavid Hildenbrand         z = zero_search(a1, mask);
301be6324c6SDavid Hildenbrand         if (z) {
302be6324c6SDavid Hildenbrand             a1 &= ~(-1ull >> clz64(z));
303be6324c6SDavid Hildenbrand             cc = 0;
304be6324c6SDavid Hildenbrand         }
305be6324c6SDavid Hildenbrand     }
306be6324c6SDavid Hildenbrand 
307be6324c6SDavid Hildenbrand     s390_vec_write_element64(v1, 0, a0);
308be6324c6SDavid Hildenbrand     s390_vec_write_element64(v1, 1, a1);
309be6324c6SDavid Hildenbrand     return cc;
310be6324c6SDavid Hildenbrand }
311be6324c6SDavid Hildenbrand 
312be6324c6SDavid Hildenbrand #define DEF_VISTR_HELPER(BITS)                                                 \
313be6324c6SDavid Hildenbrand void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc)         \
314be6324c6SDavid Hildenbrand {                                                                              \
315be6324c6SDavid Hildenbrand     vistr(v1, v2, MO_##BITS);                                                  \
316be6324c6SDavid Hildenbrand }
317be6324c6SDavid Hildenbrand DEF_VISTR_HELPER(8)
318be6324c6SDavid Hildenbrand DEF_VISTR_HELPER(16)
319be6324c6SDavid Hildenbrand DEF_VISTR_HELPER(32)
320be6324c6SDavid Hildenbrand 
321be6324c6SDavid Hildenbrand #define DEF_VISTR_CC_HELPER(BITS)                                              \
322be6324c6SDavid Hildenbrand void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
323be6324c6SDavid Hildenbrand                                 uint32_t desc)                                 \
324be6324c6SDavid Hildenbrand {                                                                              \
325be6324c6SDavid Hildenbrand     env->cc_op = vistr(v1, v2, MO_##BITS);                                     \
326be6324c6SDavid Hildenbrand }
327be6324c6SDavid Hildenbrand DEF_VISTR_CC_HELPER(8)
328be6324c6SDavid Hildenbrand DEF_VISTR_CC_HELPER(16)
329be6324c6SDavid Hildenbrand DEF_VISTR_CC_HELPER(32)
33013b0228fSDavid Hildenbrand 
element_compare(uint32_t data,uint32_t l,uint8_t c)33113b0228fSDavid Hildenbrand static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
33213b0228fSDavid Hildenbrand {
33313b0228fSDavid Hildenbrand     const bool equal = extract32(c, 7, 1);
33413b0228fSDavid Hildenbrand     const bool lower = extract32(c, 6, 1);
33513b0228fSDavid Hildenbrand     const bool higher = extract32(c, 5, 1);
33613b0228fSDavid Hildenbrand 
33713b0228fSDavid Hildenbrand     if (data < l) {
33813b0228fSDavid Hildenbrand         return lower;
33913b0228fSDavid Hildenbrand     } else if (data > l) {
34013b0228fSDavid Hildenbrand         return higher;
34113b0228fSDavid Hildenbrand     }
34213b0228fSDavid Hildenbrand     return equal;
34313b0228fSDavid Hildenbrand }
34413b0228fSDavid Hildenbrand 
vstrc(void * v1,const void * v2,const void * v3,const void * v4,bool in,bool rt,bool zs,uint8_t es)34513b0228fSDavid Hildenbrand static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
34613b0228fSDavid Hildenbrand                  bool in, bool rt, bool zs, uint8_t es)
34713b0228fSDavid Hildenbrand {
34813b0228fSDavid Hildenbrand     const uint64_t mask = get_element_lsbs_mask(es);
34913b0228fSDavid Hildenbrand     uint64_t a0 = s390_vec_read_element64(v2, 0);
35013b0228fSDavid Hildenbrand     uint64_t a1 = s390_vec_read_element64(v2, 1);
35113b0228fSDavid Hildenbrand     int first_zero = 16, first_match = 16;
35213b0228fSDavid Hildenbrand     S390Vector rt_result = {};
35313b0228fSDavid Hildenbrand     uint64_t z0, z1;
35413b0228fSDavid Hildenbrand     int i, j;
35513b0228fSDavid Hildenbrand 
35613b0228fSDavid Hildenbrand     if (zs) {
35713b0228fSDavid Hildenbrand         z0 = zero_search(a0, mask);
35813b0228fSDavid Hildenbrand         z1 = zero_search(a1, mask);
35913b0228fSDavid Hildenbrand         first_zero = match_index(z0, z1);
36013b0228fSDavid Hildenbrand     }
36113b0228fSDavid Hildenbrand 
36213b0228fSDavid Hildenbrand     for (i = 0; i < 16 / (1 << es); i++) {
36313b0228fSDavid Hildenbrand         const uint32_t data = s390_vec_read_element(v2, i, es);
36413b0228fSDavid Hildenbrand         const int cur_byte = i * (1 << es);
36513b0228fSDavid Hildenbrand         bool any_match = false;
36613b0228fSDavid Hildenbrand 
36713b0228fSDavid Hildenbrand         /* if we don't need a bit vector, we can stop early */
36813b0228fSDavid Hildenbrand         if (cur_byte == first_zero && !rt) {
36913b0228fSDavid Hildenbrand             break;
37013b0228fSDavid Hildenbrand         }
37113b0228fSDavid Hildenbrand 
37213b0228fSDavid Hildenbrand         for (j = 0; j < 16 / (1 << es); j += 2) {
37313b0228fSDavid Hildenbrand             const uint32_t l1 = s390_vec_read_element(v3, j, es);
37413b0228fSDavid Hildenbrand             const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
37513b0228fSDavid Hildenbrand             /* we are only interested in the highest byte of each element */
37613b0228fSDavid Hildenbrand             const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
37713b0228fSDavid Hildenbrand             const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
37813b0228fSDavid Hildenbrand 
37913b0228fSDavid Hildenbrand             if (element_compare(data, l1, c1) &&
38013b0228fSDavid Hildenbrand                 element_compare(data, l2, c2)) {
38113b0228fSDavid Hildenbrand                 any_match = true;
38213b0228fSDavid Hildenbrand                 break;
38313b0228fSDavid Hildenbrand             }
38413b0228fSDavid Hildenbrand         }
38513b0228fSDavid Hildenbrand         /* invert the result if requested */
38613b0228fSDavid Hildenbrand         any_match = in ^ any_match;
38713b0228fSDavid Hildenbrand 
38813b0228fSDavid Hildenbrand         if (any_match) {
38913b0228fSDavid Hildenbrand             /* indicate bit vector if requested */
39013b0228fSDavid Hildenbrand             if (rt) {
39113b0228fSDavid Hildenbrand                 const uint64_t val = -1ull;
39213b0228fSDavid Hildenbrand 
39313b0228fSDavid Hildenbrand                 first_match = MIN(cur_byte, first_match);
39413b0228fSDavid Hildenbrand                 s390_vec_write_element(&rt_result, i, es, val);
39513b0228fSDavid Hildenbrand             } else {
39613b0228fSDavid Hildenbrand                 /* stop on the first match */
39713b0228fSDavid Hildenbrand                 first_match = cur_byte;
39813b0228fSDavid Hildenbrand                 break;
39913b0228fSDavid Hildenbrand             }
40013b0228fSDavid Hildenbrand         }
40113b0228fSDavid Hildenbrand     }
40213b0228fSDavid Hildenbrand 
40313b0228fSDavid Hildenbrand     if (rt) {
40413b0228fSDavid Hildenbrand         *(S390Vector *)v1 = rt_result;
40513b0228fSDavid Hildenbrand     } else {
40613b0228fSDavid Hildenbrand         s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
40713b0228fSDavid Hildenbrand         s390_vec_write_element64(v1, 1, 0);
40813b0228fSDavid Hildenbrand     }
40913b0228fSDavid Hildenbrand 
41013b0228fSDavid Hildenbrand     if (first_zero == 16 && first_match == 16) {
41113b0228fSDavid Hildenbrand         return 3; /* no match */
41213b0228fSDavid Hildenbrand     } else if (first_zero == 16) {
41313b0228fSDavid Hildenbrand         return 1; /* matching elements, no match for zero */
41413b0228fSDavid Hildenbrand     } else if (first_match < first_zero) {
41513b0228fSDavid Hildenbrand         return 2; /* matching elements before match for zero */
41613b0228fSDavid Hildenbrand     }
41713b0228fSDavid Hildenbrand     return 0; /* match for zero */
41813b0228fSDavid Hildenbrand }
41913b0228fSDavid Hildenbrand 
42013b0228fSDavid Hildenbrand #define DEF_VSTRC_HELPER(BITS)                                                 \
42113b0228fSDavid Hildenbrand void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3,        \
42213b0228fSDavid Hildenbrand                               const void *v4, uint32_t desc)                   \
42313b0228fSDavid Hildenbrand {                                                                              \
42413b0228fSDavid Hildenbrand     const bool in = extract32(simd_data(desc), 3, 1);                          \
42513b0228fSDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
42613b0228fSDavid Hildenbrand                                                                                \
42713b0228fSDavid Hildenbrand     vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                               \
42813b0228fSDavid Hildenbrand }
42913b0228fSDavid Hildenbrand DEF_VSTRC_HELPER(8)
43013b0228fSDavid Hildenbrand DEF_VSTRC_HELPER(16)
43113b0228fSDavid Hildenbrand DEF_VSTRC_HELPER(32)
43213b0228fSDavid Hildenbrand 
43313b0228fSDavid Hildenbrand #define DEF_VSTRC_RT_HELPER(BITS)                                              \
43413b0228fSDavid Hildenbrand void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3,     \
43513b0228fSDavid Hildenbrand                                  const void *v4, uint32_t desc)                \
43613b0228fSDavid Hildenbrand {                                                                              \
43713b0228fSDavid Hildenbrand     const bool in = extract32(simd_data(desc), 3, 1);                          \
43813b0228fSDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
43913b0228fSDavid Hildenbrand                                                                                \
44013b0228fSDavid Hildenbrand     vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                               \
44113b0228fSDavid Hildenbrand }
44213b0228fSDavid Hildenbrand DEF_VSTRC_RT_HELPER(8)
44313b0228fSDavid Hildenbrand DEF_VSTRC_RT_HELPER(16)
44413b0228fSDavid Hildenbrand DEF_VSTRC_RT_HELPER(32)
44513b0228fSDavid Hildenbrand 
44613b0228fSDavid Hildenbrand #define DEF_VSTRC_CC_HELPER(BITS)                                              \
44713b0228fSDavid Hildenbrand void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3,     \
44813b0228fSDavid Hildenbrand                                  const void *v4, CPUS390XState *env,           \
44913b0228fSDavid Hildenbrand                                  uint32_t desc)                                \
45013b0228fSDavid Hildenbrand {                                                                              \
45113b0228fSDavid Hildenbrand     const bool in = extract32(simd_data(desc), 3, 1);                          \
45213b0228fSDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
45313b0228fSDavid Hildenbrand                                                                                \
45413b0228fSDavid Hildenbrand     env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                  \
45513b0228fSDavid Hildenbrand }
45613b0228fSDavid Hildenbrand DEF_VSTRC_CC_HELPER(8)
45713b0228fSDavid Hildenbrand DEF_VSTRC_CC_HELPER(16)
45813b0228fSDavid Hildenbrand DEF_VSTRC_CC_HELPER(32)
45913b0228fSDavid Hildenbrand 
46013b0228fSDavid Hildenbrand #define DEF_VSTRC_CC_RT_HELPER(BITS)                                           \
46113b0228fSDavid Hildenbrand void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
46213b0228fSDavid Hildenbrand                                     const void *v4, CPUS390XState *env,        \
46313b0228fSDavid Hildenbrand                                     uint32_t desc)                             \
46413b0228fSDavid Hildenbrand {                                                                              \
46513b0228fSDavid Hildenbrand     const bool in = extract32(simd_data(desc), 3, 1);                          \
46613b0228fSDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
46713b0228fSDavid Hildenbrand                                                                                \
46813b0228fSDavid Hildenbrand     env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                  \
46913b0228fSDavid Hildenbrand }
47013b0228fSDavid Hildenbrand DEF_VSTRC_CC_RT_HELPER(8)
47113b0228fSDavid Hildenbrand DEF_VSTRC_CC_RT_HELPER(16)
47213b0228fSDavid Hildenbrand DEF_VSTRC_CC_RT_HELPER(32)
4731d706f31SDavid Miller 
vstrs(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,const S390Vector * v4,uint8_t es,bool zs)4741d706f31SDavid Miller static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
4751d706f31SDavid Miller                  const S390Vector *v4, uint8_t es, bool zs)
4761d706f31SDavid Miller {
477*791b2b6aSIlya Leoshkevich     int substr_elen, i, j, k, cc;
4781d706f31SDavid Miller     int nelem = 16 >> es;
479*791b2b6aSIlya Leoshkevich     int str_leftmost_0;
4801d706f31SDavid Miller 
4811d706f31SDavid Miller     substr_elen = s390_vec_read_element8(v4, 7) >> es;
4821d706f31SDavid Miller 
4831d706f31SDavid Miller     /* If ZS, bound substr length by min(nelem, strlen(v3)). */
4841d706f31SDavid Miller     if (zs) {
4851d706f31SDavid Miller         substr_elen = MIN(substr_elen, nelem);
4861d706f31SDavid Miller         for (i = 0; i < substr_elen; i++) {
4871d706f31SDavid Miller             if (s390_vec_read_element(v3, i, es) == 0) {
4881d706f31SDavid Miller                 substr_elen = i;
4891d706f31SDavid Miller                 break;
4901d706f31SDavid Miller             }
4911d706f31SDavid Miller         }
4921d706f31SDavid Miller     }
4931d706f31SDavid Miller 
4941d706f31SDavid Miller     if (substr_elen == 0) {
4951d706f31SDavid Miller         cc = 2; /* full match for degenerate case of empty substr */
4961d706f31SDavid Miller         k = 0;
4971d706f31SDavid Miller         goto done;
4981d706f31SDavid Miller     }
4991d706f31SDavid Miller 
5001d706f31SDavid Miller     /* If ZS, look for eos in the searched string. */
501*791b2b6aSIlya Leoshkevich     str_leftmost_0 = nelem;
5021d706f31SDavid Miller     if (zs) {
5031d706f31SDavid Miller         for (k = 0; k < nelem; k++) {
5041d706f31SDavid Miller             if (s390_vec_read_element(v2, k, es) == 0) {
505*791b2b6aSIlya Leoshkevich                 str_leftmost_0 = k;
5061d706f31SDavid Miller                 break;
5071d706f31SDavid Miller             }
5081d706f31SDavid Miller         }
5091d706f31SDavid Miller     }
5101d706f31SDavid Miller 
511*791b2b6aSIlya Leoshkevich     cc = str_leftmost_0 == nelem ? 0 : 1;  /* No match. */
512*791b2b6aSIlya Leoshkevich     for (k = 0; k < nelem; k++) {
5131d706f31SDavid Miller         i = MIN(nelem, k + substr_elen);
514*791b2b6aSIlya Leoshkevich         for (j = k; j < i; j++) {
5151d706f31SDavid Miller             uint32_t e2 = s390_vec_read_element(v2, j, es);
5161d706f31SDavid Miller             uint32_t e3 = s390_vec_read_element(v3, j - k, es);
5171d706f31SDavid Miller             if (e2 != e3) {
5181d706f31SDavid Miller                 break;
5191d706f31SDavid Miller             }
5201d706f31SDavid Miller         }
5211d706f31SDavid Miller         if (j == i) {
522*791b2b6aSIlya Leoshkevich             /* All elements matched. */
523*791b2b6aSIlya Leoshkevich             if (k > str_leftmost_0) {
524*791b2b6aSIlya Leoshkevich                 cc = 1;  /* Ignored match. */
525*791b2b6aSIlya Leoshkevich                 k = nelem;
526*791b2b6aSIlya Leoshkevich             } else if (i - k == substr_elen) {
527*791b2b6aSIlya Leoshkevich                 cc = 2;  /* Full match. */
528*791b2b6aSIlya Leoshkevich             } else {
529*791b2b6aSIlya Leoshkevich                 cc = 3;  /* Partial match. */
530*791b2b6aSIlya Leoshkevich             }
531*791b2b6aSIlya Leoshkevich             break;
5321d706f31SDavid Miller         }
5331d706f31SDavid Miller     }
5341d706f31SDavid Miller 
5351d706f31SDavid Miller  done:
5361d706f31SDavid Miller     s390_vec_write_element64(v1, 0, k << es);
5371d706f31SDavid Miller     s390_vec_write_element64(v1, 1, 0);
5381d706f31SDavid Miller     return cc;
5391d706f31SDavid Miller }
5401d706f31SDavid Miller 
5411d706f31SDavid Miller #define DEF_VSTRS_HELPER(BITS)                                             \
5421d706f31SDavid Miller void QEMU_FLATTEN HELPER(gvec_vstrs_##BITS)(void *v1, const void *v2,      \
5431d706f31SDavid Miller     const void *v3, const void *v4, CPUS390XState *env, uint32_t desc)     \
5441d706f31SDavid Miller     { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, false); }              \
5451d706f31SDavid Miller void QEMU_FLATTEN HELPER(gvec_vstrs_zs##BITS)(void *v1, const void *v2,    \
5461d706f31SDavid Miller     const void *v3, const void *v4, CPUS390XState *env, uint32_t desc)     \
5471d706f31SDavid Miller     { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, true); }
5481d706f31SDavid Miller 
5491d706f31SDavid Miller DEF_VSTRS_HELPER(8)
5501d706f31SDavid Miller DEF_VSTRS_HELPER(16)
5511d706f31SDavid Miller DEF_VSTRS_HELPER(32)
552