xref: /qemu/target/s390x/tcg/vec_string_helper.c (revision 8c0e1e58ce45ab1317bed817a9821b0286f926a2)
11fd28638SDavid Hildenbrand /*
21fd28638SDavid Hildenbrand  * QEMU TCG support -- s390x vector string instruction support
31fd28638SDavid Hildenbrand  *
41fd28638SDavid Hildenbrand  * Copyright (C) 2019 Red Hat Inc
51fd28638SDavid Hildenbrand  *
61fd28638SDavid Hildenbrand  * Authors:
71fd28638SDavid Hildenbrand  *   David Hildenbrand <david@redhat.com>
81fd28638SDavid Hildenbrand  *
91fd28638SDavid Hildenbrand  * This work is licensed under the terms of the GNU GPL, version 2 or later.
101fd28638SDavid Hildenbrand  * See the COPYING file in the top-level directory.
111fd28638SDavid Hildenbrand  */
121fd28638SDavid Hildenbrand #include "qemu/osdep.h"
131fd28638SDavid Hildenbrand #include "qemu-common.h"
141fd28638SDavid Hildenbrand #include "cpu.h"
151fd28638SDavid Hildenbrand #include "internal.h"
161fd28638SDavid Hildenbrand #include "vec.h"
171fd28638SDavid Hildenbrand #include "tcg/tcg.h"
181fd28638SDavid Hildenbrand #include "tcg/tcg-gvec-desc.h"
191fd28638SDavid Hildenbrand #include "exec/helper-proto.h"
201fd28638SDavid Hildenbrand 
211fd28638SDavid Hildenbrand /*
221fd28638SDavid Hildenbrand  * Returns a bit set in the MSB of each element that is zero,
231fd28638SDavid Hildenbrand  * as defined by the mask.
241fd28638SDavid Hildenbrand  */
251fd28638SDavid Hildenbrand static inline uint64_t zero_search(uint64_t a, uint64_t mask)
261fd28638SDavid Hildenbrand {
271fd28638SDavid Hildenbrand     return ~(((a & mask) + mask) | a | mask);
281fd28638SDavid Hildenbrand }
291fd28638SDavid Hildenbrand 
301fd28638SDavid Hildenbrand /*
311fd28638SDavid Hildenbrand  * Returns the byte offset for the first match, or 16 for no match.
321fd28638SDavid Hildenbrand  */
331fd28638SDavid Hildenbrand static inline int match_index(uint64_t c0, uint64_t c1)
341fd28638SDavid Hildenbrand {
351fd28638SDavid Hildenbrand     return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
361fd28638SDavid Hildenbrand }
371fd28638SDavid Hildenbrand 
381fd28638SDavid Hildenbrand /*
391fd28638SDavid Hildenbrand  * Returns the number of bits composing one element.
401fd28638SDavid Hildenbrand  */
411fd28638SDavid Hildenbrand static uint8_t get_element_bits(uint8_t es)
421fd28638SDavid Hildenbrand {
431fd28638SDavid Hildenbrand     return (1 << es) * BITS_PER_BYTE;
441fd28638SDavid Hildenbrand }
451fd28638SDavid Hildenbrand 
461fd28638SDavid Hildenbrand /*
471fd28638SDavid Hildenbrand  * Returns the bitmask for a single element.
481fd28638SDavid Hildenbrand  */
491fd28638SDavid Hildenbrand static uint64_t get_single_element_mask(uint8_t es)
501fd28638SDavid Hildenbrand {
511fd28638SDavid Hildenbrand     return -1ull >> (64 - get_element_bits(es));
521fd28638SDavid Hildenbrand }
531fd28638SDavid Hildenbrand 
541fd28638SDavid Hildenbrand /*
551fd28638SDavid Hildenbrand  * Returns the bitmask for a single element (excluding the MSB).
561fd28638SDavid Hildenbrand  */
571fd28638SDavid Hildenbrand static uint64_t get_single_element_lsbs_mask(uint8_t es)
581fd28638SDavid Hildenbrand {
591fd28638SDavid Hildenbrand     return -1ull >> (65 - get_element_bits(es));
601fd28638SDavid Hildenbrand }
611fd28638SDavid Hildenbrand 
621fd28638SDavid Hildenbrand /*
631fd28638SDavid Hildenbrand  * Returns the bitmasks for multiple elements (excluding the MSBs).
641fd28638SDavid Hildenbrand  */
651fd28638SDavid Hildenbrand static uint64_t get_element_lsbs_mask(uint8_t es)
661fd28638SDavid Hildenbrand {
671fd28638SDavid Hildenbrand     return dup_const(es, get_single_element_lsbs_mask(es));
681fd28638SDavid Hildenbrand }
691fd28638SDavid Hildenbrand 
701fd28638SDavid Hildenbrand static int vfae(void *v1, const void *v2, const void *v3, bool in,
711fd28638SDavid Hildenbrand                 bool rt, bool zs, uint8_t es)
721fd28638SDavid Hildenbrand {
731fd28638SDavid Hildenbrand     const uint64_t mask = get_element_lsbs_mask(es);
741fd28638SDavid Hildenbrand     const int bits = get_element_bits(es);
751fd28638SDavid Hildenbrand     uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
761fd28638SDavid Hildenbrand     uint64_t first_zero = 16;
771fd28638SDavid Hildenbrand     uint64_t first_equal;
781fd28638SDavid Hildenbrand     int i;
791fd28638SDavid Hildenbrand 
801fd28638SDavid Hildenbrand     a0 = s390_vec_read_element64(v2, 0);
811fd28638SDavid Hildenbrand     a1 = s390_vec_read_element64(v2, 1);
821fd28638SDavid Hildenbrand     b0 = s390_vec_read_element64(v3, 0);
831fd28638SDavid Hildenbrand     b1 = s390_vec_read_element64(v3, 1);
841fd28638SDavid Hildenbrand     e0 = 0;
851fd28638SDavid Hildenbrand     e1 = 0;
861fd28638SDavid Hildenbrand     /* compare against equality with every other element */
871fd28638SDavid Hildenbrand     for (i = 0; i < 64; i += bits) {
881fd28638SDavid Hildenbrand         t0 = rol64(b0, i);
891fd28638SDavid Hildenbrand         t1 = rol64(b1, i);
901fd28638SDavid Hildenbrand         e0 |= zero_search(a0 ^ t0, mask);
911fd28638SDavid Hildenbrand         e0 |= zero_search(a0 ^ t1, mask);
921fd28638SDavid Hildenbrand         e1 |= zero_search(a1 ^ t0, mask);
931fd28638SDavid Hildenbrand         e1 |= zero_search(a1 ^ t1, mask);
941fd28638SDavid Hildenbrand     }
951fd28638SDavid Hildenbrand     /* invert the result if requested - invert only the MSBs */
961fd28638SDavid Hildenbrand     if (in) {
971fd28638SDavid Hildenbrand         e0 = ~e0 & ~mask;
981fd28638SDavid Hildenbrand         e1 = ~e1 & ~mask;
991fd28638SDavid Hildenbrand     }
1001fd28638SDavid Hildenbrand     first_equal = match_index(e0, e1);
1011fd28638SDavid Hildenbrand 
1021fd28638SDavid Hildenbrand     if (zs) {
1031fd28638SDavid Hildenbrand         z0 = zero_search(a0, mask);
1041fd28638SDavid Hildenbrand         z1 = zero_search(a1, mask);
1051fd28638SDavid Hildenbrand         first_zero = match_index(z0, z1);
1061fd28638SDavid Hildenbrand     }
1071fd28638SDavid Hildenbrand 
1081fd28638SDavid Hildenbrand     if (rt) {
1091fd28638SDavid Hildenbrand         e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
1101fd28638SDavid Hildenbrand         e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
1111fd28638SDavid Hildenbrand         s390_vec_write_element64(v1, 0, e0);
1121fd28638SDavid Hildenbrand         s390_vec_write_element64(v1, 1, e1);
1131fd28638SDavid Hildenbrand     } else {
1141fd28638SDavid Hildenbrand         s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
1151fd28638SDavid Hildenbrand         s390_vec_write_element64(v1, 1, 0);
1161fd28638SDavid Hildenbrand     }
1171fd28638SDavid Hildenbrand 
1181fd28638SDavid Hildenbrand     if (first_zero == 16 && first_equal == 16) {
1191fd28638SDavid Hildenbrand         return 3; /* no match */
1201fd28638SDavid Hildenbrand     } else if (first_zero == 16) {
1211fd28638SDavid Hildenbrand         return 1; /* matching elements, no match for zero */
1221fd28638SDavid Hildenbrand     } else if (first_equal < first_zero) {
1231fd28638SDavid Hildenbrand         return 2; /* matching elements before match for zero */
1241fd28638SDavid Hildenbrand     }
1251fd28638SDavid Hildenbrand     return 0; /* match for zero */
1261fd28638SDavid Hildenbrand }
1271fd28638SDavid Hildenbrand 
1281fd28638SDavid Hildenbrand #define DEF_VFAE_HELPER(BITS)                                                  \
1291fd28638SDavid Hildenbrand void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
1301fd28638SDavid Hildenbrand                              uint32_t desc)                                    \
1311fd28638SDavid Hildenbrand {                                                                              \
1321fd28638SDavid Hildenbrand     const bool in = extract32(simd_data(desc), 3, 1);                          \
1331fd28638SDavid Hildenbrand     const bool rt = extract32(simd_data(desc), 2, 1);                          \
1341fd28638SDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
1351fd28638SDavid Hildenbrand                                                                                \
1361fd28638SDavid Hildenbrand     vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
1371fd28638SDavid Hildenbrand }
1381fd28638SDavid Hildenbrand DEF_VFAE_HELPER(8)
1391fd28638SDavid Hildenbrand DEF_VFAE_HELPER(16)
1401fd28638SDavid Hildenbrand DEF_VFAE_HELPER(32)
1411fd28638SDavid Hildenbrand 
1421fd28638SDavid Hildenbrand #define DEF_VFAE_CC_HELPER(BITS)                                               \
1431fd28638SDavid Hildenbrand void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
1441fd28638SDavid Hildenbrand                                 CPUS390XState *env, uint32_t desc)             \
1451fd28638SDavid Hildenbrand {                                                                              \
1461fd28638SDavid Hildenbrand     const bool in = extract32(simd_data(desc), 3, 1);                          \
1471fd28638SDavid Hildenbrand     const bool rt = extract32(simd_data(desc), 2, 1);                          \
1481fd28638SDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
1491fd28638SDavid Hildenbrand                                                                                \
1501fd28638SDavid Hildenbrand     env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
1511fd28638SDavid Hildenbrand }
1521fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(8)
1531fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(16)
1541fd28638SDavid Hildenbrand DEF_VFAE_CC_HELPER(32)
155*8c0e1e58SDavid Hildenbrand 
156*8c0e1e58SDavid Hildenbrand static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
157*8c0e1e58SDavid Hildenbrand {
158*8c0e1e58SDavid Hildenbrand     const uint64_t mask = get_element_lsbs_mask(es);
159*8c0e1e58SDavid Hildenbrand     uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
160*8c0e1e58SDavid Hildenbrand     uint64_t first_zero = 16;
161*8c0e1e58SDavid Hildenbrand     uint64_t first_equal;
162*8c0e1e58SDavid Hildenbrand 
163*8c0e1e58SDavid Hildenbrand     a0 = s390_vec_read_element64(v2, 0);
164*8c0e1e58SDavid Hildenbrand     a1 = s390_vec_read_element64(v2, 1);
165*8c0e1e58SDavid Hildenbrand     b0 = s390_vec_read_element64(v3, 0);
166*8c0e1e58SDavid Hildenbrand     b1 = s390_vec_read_element64(v3, 1);
167*8c0e1e58SDavid Hildenbrand     e0 = zero_search(a0 ^ b0, mask);
168*8c0e1e58SDavid Hildenbrand     e1 = zero_search(a1 ^ b1, mask);
169*8c0e1e58SDavid Hildenbrand     first_equal = match_index(e0, e1);
170*8c0e1e58SDavid Hildenbrand 
171*8c0e1e58SDavid Hildenbrand     if (zs) {
172*8c0e1e58SDavid Hildenbrand         z0 = zero_search(a0, mask);
173*8c0e1e58SDavid Hildenbrand         z1 = zero_search(a1, mask);
174*8c0e1e58SDavid Hildenbrand         first_zero = match_index(z0, z1);
175*8c0e1e58SDavid Hildenbrand     }
176*8c0e1e58SDavid Hildenbrand 
177*8c0e1e58SDavid Hildenbrand     s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
178*8c0e1e58SDavid Hildenbrand     s390_vec_write_element64(v1, 1, 0);
179*8c0e1e58SDavid Hildenbrand     if (first_zero == 16 && first_equal == 16) {
180*8c0e1e58SDavid Hildenbrand         return 3; /* no match */
181*8c0e1e58SDavid Hildenbrand     } else if (first_zero == 16) {
182*8c0e1e58SDavid Hildenbrand         return 1; /* matching elements, no match for zero */
183*8c0e1e58SDavid Hildenbrand     } else if (first_equal < first_zero) {
184*8c0e1e58SDavid Hildenbrand         return 2; /* matching elements before match for zero */
185*8c0e1e58SDavid Hildenbrand     }
186*8c0e1e58SDavid Hildenbrand     return 0; /* match for zero */
187*8c0e1e58SDavid Hildenbrand }
188*8c0e1e58SDavid Hildenbrand 
189*8c0e1e58SDavid Hildenbrand #define DEF_VFEE_HELPER(BITS)                                                  \
190*8c0e1e58SDavid Hildenbrand void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3,         \
191*8c0e1e58SDavid Hildenbrand                              uint32_t desc)                                    \
192*8c0e1e58SDavid Hildenbrand {                                                                              \
193*8c0e1e58SDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
194*8c0e1e58SDavid Hildenbrand                                                                                \
195*8c0e1e58SDavid Hildenbrand     vfee(v1, v2, v3, zs, MO_##BITS);                                           \
196*8c0e1e58SDavid Hildenbrand }
197*8c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(8)
198*8c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(16)
199*8c0e1e58SDavid Hildenbrand DEF_VFEE_HELPER(32)
200*8c0e1e58SDavid Hildenbrand 
201*8c0e1e58SDavid Hildenbrand #define DEF_VFEE_CC_HELPER(BITS)                                               \
202*8c0e1e58SDavid Hildenbrand void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
203*8c0e1e58SDavid Hildenbrand                                 CPUS390XState *env, uint32_t desc)             \
204*8c0e1e58SDavid Hildenbrand {                                                                              \
205*8c0e1e58SDavid Hildenbrand     const bool zs = extract32(simd_data(desc), 1, 1);                          \
206*8c0e1e58SDavid Hildenbrand                                                                                \
207*8c0e1e58SDavid Hildenbrand     env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS);                              \
208*8c0e1e58SDavid Hildenbrand }
209*8c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(8)
210*8c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(16)
211*8c0e1e58SDavid Hildenbrand DEF_VFEE_CC_HELPER(32)
212