xref: /qemu/target/s390x/tcg/vec_fpu_helper.c (revision e384332cb53bd5b4d813cc38b5d19b3648047e14)
1 /*
2  * QEMU TCG support -- s390x vector floating point instruction support
3  *
4  * Copyright (C) 2019 Red Hat Inc
5  *
6  * Authors:
7  *   David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "qemu-common.h"
14 #include "cpu.h"
15 #include "internal.h"
16 #include "vec.h"
17 #include "tcg_s390x.h"
18 #include "tcg/tcg-gvec-desc.h"
19 #include "exec/exec-all.h"
20 #include "exec/helper-proto.h"
21 #include "fpu/softfloat.h"
22 
23 #define VIC_INVALID         0x1
24 #define VIC_DIVBYZERO       0x2
25 #define VIC_OVERFLOW        0x3
26 #define VIC_UNDERFLOW       0x4
27 #define VIC_INEXACT         0x5
28 
29 /* returns the VEX. If the VEX is 0, there is no trap */
30 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
31                               uint8_t *vec_exc)
32 {
33     uint8_t vece_exc = 0, trap_exc;
34     unsigned qemu_exc;
35 
36     /* Retrieve and clear the softfloat exceptions */
37     qemu_exc = env->fpu_status.float_exception_flags;
38     if (qemu_exc == 0) {
39         return 0;
40     }
41     env->fpu_status.float_exception_flags = 0;
42 
43     vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
44 
45     /* Add them to the vector-wide s390x exception bits */
46     *vec_exc |= vece_exc;
47 
48     /* Check for traps and construct the VXC */
49     trap_exc = vece_exc & env->fpc >> 24;
50     if (trap_exc) {
51         if (trap_exc & S390_IEEE_MASK_INVALID) {
52             return enr << 4 | VIC_INVALID;
53         } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
54             return enr << 4 | VIC_DIVBYZERO;
55         } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
56             return enr << 4 | VIC_OVERFLOW;
57         } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
58             return enr << 4 | VIC_UNDERFLOW;
59         } else if (!XxC) {
60             g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
61             /* inexact has lowest priority on traps */
62             return enr << 4 | VIC_INEXACT;
63         }
64     }
65     return 0;
66 }
67 
68 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
69                             uintptr_t retaddr)
70 {
71     if (vxc) {
72         /* on traps, the fpc flags are not updated, instruction is suppressed */
73         tcg_s390_vector_exception(env, vxc, retaddr);
74     }
75     if (vec_exc) {
76         /* indicate exceptions for all elements combined */
77         env->fpc |= vec_exc << 16;
78     }
79 }
80 
81 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
82 {
83     return make_float32(s390_vec_read_element32(v, enr));
84 }
85 
86 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
87 {
88     return make_float64(s390_vec_read_element64(v, enr));
89 }
90 
91 static float128 s390_vec_read_float128(const S390Vector *v)
92 {
93     return make_float128(s390_vec_read_element64(v, 0),
94                          s390_vec_read_element64(v, 1));
95 }
96 
97 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
98 {
99     return s390_vec_write_element32(v, enr, data);
100 }
101 
102 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
103 {
104     return s390_vec_write_element64(v, enr, data);
105 }
106 
107 static void s390_vec_write_float128(S390Vector *v, float128 data)
108 {
109     s390_vec_write_element64(v, 0, data.high);
110     s390_vec_write_element64(v, 1, data.low);
111 }
112 
113 typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
114 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
115                     bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
116                     uintptr_t retaddr)
117 {
118     uint8_t vxc, vec_exc = 0;
119     S390Vector tmp = {};
120     int i, old_mode;
121 
122     old_mode = s390_swap_bfp_rounding_mode(env, erm);
123     for (i = 0; i < 4; i++) {
124         const float32 a = s390_vec_read_float32(v2, i);
125 
126         s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
127         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
128         if (s || vxc) {
129             break;
130         }
131     }
132     s390_restore_bfp_rounding_mode(env, old_mode);
133     handle_ieee_exc(env, vxc, vec_exc, retaddr);
134     *v1 = tmp;
135 }
136 
137 typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
138 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
139                     bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
140                     uintptr_t retaddr)
141 {
142     uint8_t vxc, vec_exc = 0;
143     S390Vector tmp = {};
144     int i, old_mode;
145 
146     old_mode = s390_swap_bfp_rounding_mode(env, erm);
147     for (i = 0; i < 2; i++) {
148         const float64 a = s390_vec_read_float64(v2, i);
149 
150         s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
151         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
152         if (s || vxc) {
153             break;
154         }
155     }
156     s390_restore_bfp_rounding_mode(env, old_mode);
157     handle_ieee_exc(env, vxc, vec_exc, retaddr);
158     *v1 = tmp;
159 }
160 
161 typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
162 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
163                     bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
164                     uintptr_t retaddr)
165 {
166     const float128 a = s390_vec_read_float128(v2);
167     uint8_t vxc, vec_exc = 0;
168     S390Vector tmp = {};
169     int old_mode;
170 
171     old_mode = s390_swap_bfp_rounding_mode(env, erm);
172     s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
173     vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
174     s390_restore_bfp_rounding_mode(env, old_mode);
175     handle_ieee_exc(env, vxc, vec_exc, retaddr);
176     *v1 = tmp;
177 }
178 
179 static float64 vcdg64(float64 a, float_status *s)
180 {
181     return int64_to_float64(a, s);
182 }
183 
184 static float64 vcdlg64(float64 a, float_status *s)
185 {
186     return uint64_to_float64(a, s);
187 }
188 
189 static float64 vcgd64(float64 a, float_status *s)
190 {
191     const float64 tmp = float64_to_int64(a, s);
192 
193     return float64_is_any_nan(a) ? INT64_MIN : tmp;
194 }
195 
196 static float64 vclgd64(float64 a, float_status *s)
197 {
198     const float64 tmp = float64_to_uint64(a, s);
199 
200     return float64_is_any_nan(a) ? 0 : tmp;
201 }
202 
203 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS)                                       \
204 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env,   \
205                                uint32_t desc)                                  \
206 {                                                                              \
207     const uint8_t erm = extract32(simd_data(desc), 4, 4);                      \
208     const bool se = extract32(simd_data(desc), 3, 1);                          \
209     const bool XxC = extract32(simd_data(desc), 2, 1);                         \
210                                                                                \
211     vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC());                     \
212 }
213 
214 #define DEF_GVEC_VOP2_64(NAME)                                                 \
215 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
216 
217 #define DEF_GVEC_VOP2(NAME, OP)                                                \
218 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)                                       \
219 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)                                       \
220 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
221 
222 DEF_GVEC_VOP2_64(vcdg)
223 DEF_GVEC_VOP2_64(vcdlg)
224 DEF_GVEC_VOP2_64(vcgd)
225 DEF_GVEC_VOP2_64(vclgd)
226 DEF_GVEC_VOP2(vfi, round_to_int)
227 DEF_GVEC_VOP2(vfsq, sqrt)
228 
229 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
230 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
231                     CPUS390XState *env, bool s, vop32_3_fn fn,
232                     uintptr_t retaddr)
233 {
234     uint8_t vxc, vec_exc = 0;
235     S390Vector tmp = {};
236     int i;
237 
238     for (i = 0; i < 4; i++) {
239         const float32 a = s390_vec_read_float32(v2, i);
240         const float32 b = s390_vec_read_float32(v3, i);
241 
242         s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
243         vxc = check_ieee_exc(env, i, false, &vec_exc);
244         if (s || vxc) {
245             break;
246         }
247     }
248     handle_ieee_exc(env, vxc, vec_exc, retaddr);
249     *v1 = tmp;
250 }
251 
252 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
253 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
254                     CPUS390XState *env, bool s, vop64_3_fn fn,
255                     uintptr_t retaddr)
256 {
257     uint8_t vxc, vec_exc = 0;
258     S390Vector tmp = {};
259     int i;
260 
261     for (i = 0; i < 2; i++) {
262         const float64 a = s390_vec_read_float64(v2, i);
263         const float64 b = s390_vec_read_float64(v3, i);
264 
265         s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
266         vxc = check_ieee_exc(env, i, false, &vec_exc);
267         if (s || vxc) {
268             break;
269         }
270     }
271     handle_ieee_exc(env, vxc, vec_exc, retaddr);
272     *v1 = tmp;
273 }
274 
275 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
276 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
277                      CPUS390XState *env, bool s, vop128_3_fn fn,
278                      uintptr_t retaddr)
279 {
280     const float128 a = s390_vec_read_float128(v2);
281     const float128 b = s390_vec_read_float128(v3);
282     uint8_t vxc, vec_exc = 0;
283     S390Vector tmp = {};
284 
285     s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
286     vxc = check_ieee_exc(env, 0, false, &vec_exc);
287     handle_ieee_exc(env, vxc, vec_exc, retaddr);
288     *v1 = tmp;
289 }
290 
291 #define DEF_GVEC_VOP3_B(NAME, OP, BITS)                                        \
292 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
293                               CPUS390XState *env, uint32_t desc)               \
294 {                                                                              \
295     const bool se = extract32(simd_data(desc), 3, 1);                          \
296                                                                                \
297     vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC());           \
298 }
299 
300 #define DEF_GVEC_VOP3(NAME, OP)                                                \
301 DEF_GVEC_VOP3_B(NAME, OP, 32)                                                  \
302 DEF_GVEC_VOP3_B(NAME, OP, 64)                                                  \
303 DEF_GVEC_VOP3_B(NAME, OP, 128)
304 
305 DEF_GVEC_VOP3(vfa, add)
306 DEF_GVEC_VOP3(vfs, sub)
307 DEF_GVEC_VOP3(vfd, div)
308 DEF_GVEC_VOP3(vfm, mul)
309 
310 static int wfc64(const S390Vector *v1, const S390Vector *v2,
311                  CPUS390XState *env, bool signal, uintptr_t retaddr)
312 {
313     /* only the zero-indexed elements are compared */
314     const float64 a = s390_vec_read_float64(v1, 0);
315     const float64 b = s390_vec_read_float64(v2, 0);
316     uint8_t vxc, vec_exc = 0;
317     int cmp;
318 
319     if (signal) {
320         cmp = float64_compare(a, b, &env->fpu_status);
321     } else {
322         cmp = float64_compare_quiet(a, b, &env->fpu_status);
323     }
324     vxc = check_ieee_exc(env, 0, false, &vec_exc);
325     handle_ieee_exc(env, vxc, vec_exc, retaddr);
326 
327     return float_comp_to_cc(env, cmp);
328 }
329 
330 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS)                                     \
331 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2,                 \
332                                CPUS390XState *env, uint32_t desc)              \
333 {                                                                              \
334     env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC());                      \
335 }
336 
337 #define DEF_GVEC_WFC(NAME, SIGNAL)                                             \
338      DEF_GVEC_WFC_B(NAME, SIGNAL, 64)
339 
340 DEF_GVEC_WFC(wfc, false)
341 DEF_GVEC_WFC(wfk, true)
342 
343 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
344 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
345                  CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
346 {
347     uint8_t vxc, vec_exc = 0;
348     S390Vector tmp = {};
349     int match = 0;
350     int i;
351 
352     for (i = 0; i < 4; i++) {
353         const float32 a = s390_vec_read_float32(v2, i);
354         const float32 b = s390_vec_read_float32(v3, i);
355 
356         /* swap the order of the parameters, so we can use existing functions */
357         if (fn(b, a, &env->fpu_status)) {
358             match++;
359             s390_vec_write_element32(&tmp, i, -1u);
360         }
361         vxc = check_ieee_exc(env, i, false, &vec_exc);
362         if (s || vxc) {
363             break;
364         }
365     }
366 
367     handle_ieee_exc(env, vxc, vec_exc, retaddr);
368     *v1 = tmp;
369     if (match) {
370         return s || match == 4 ? 0 : 1;
371     }
372     return 3;
373 }
374 
375 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
376 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
377                  CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
378 {
379     uint8_t vxc, vec_exc = 0;
380     S390Vector tmp = {};
381     int match = 0;
382     int i;
383 
384     for (i = 0; i < 2; i++) {
385         const float64 a = s390_vec_read_float64(v2, i);
386         const float64 b = s390_vec_read_float64(v3, i);
387 
388         /* swap the order of the parameters, so we can use existing functions */
389         if (fn(b, a, &env->fpu_status)) {
390             match++;
391             s390_vec_write_element64(&tmp, i, -1ull);
392         }
393         vxc = check_ieee_exc(env, i, false, &vec_exc);
394         if (s || vxc) {
395             break;
396         }
397     }
398 
399     handle_ieee_exc(env, vxc, vec_exc, retaddr);
400     *v1 = tmp;
401     if (match) {
402         return s || match == 2 ? 0 : 1;
403     }
404     return 3;
405 }
406 
407 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
408 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
409                  CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
410 {
411     const float128 a = s390_vec_read_float128(v2);
412     const float128 b = s390_vec_read_float128(v3);
413     uint8_t vxc, vec_exc = 0;
414     S390Vector tmp = {};
415     bool match = false;
416 
417     /* swap the order of the parameters, so we can use existing functions */
418     if (fn(b, a, &env->fpu_status)) {
419         match = true;
420         s390_vec_write_element64(&tmp, 0, -1ull);
421         s390_vec_write_element64(&tmp, 1, -1ull);
422     }
423     vxc = check_ieee_exc(env, 0, false, &vec_exc);
424     handle_ieee_exc(env, vxc, vec_exc, retaddr);
425     *v1 = tmp;
426     return match ? 0 : 3;
427 }
428 
429 #define DEF_GVEC_VFC_B(NAME, OP, BITS)                                         \
430 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
431                                CPUS390XState *env, uint32_t desc)              \
432 {                                                                              \
433     const bool se = extract32(simd_data(desc), 3, 1);                          \
434     const bool sq = extract32(simd_data(desc), 2, 1);                          \
435     vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
436                                                                                \
437     vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                               \
438 }                                                                              \
439                                                                                \
440 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3,  \
441                                     CPUS390XState *env, uint32_t desc)         \
442 {                                                                              \
443     const bool se = extract32(simd_data(desc), 3, 1);                          \
444     const bool sq = extract32(simd_data(desc), 2, 1);                          \
445     vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
446                                                                                \
447     env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                  \
448 }
449 
450 #define DEF_GVEC_VFC(NAME, OP)                                                 \
451 DEF_GVEC_VFC_B(NAME, OP, 32)                                                   \
452 DEF_GVEC_VFC_B(NAME, OP, 64)                                                   \
453 DEF_GVEC_VFC_B(NAME, OP, 128)                                                  \
454 
455 DEF_GVEC_VFC(vfce, eq)
456 DEF_GVEC_VFC(vfch, lt)
457 DEF_GVEC_VFC(vfche, le)
458 
459 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
460                          uint32_t desc)
461 {
462     const bool s = extract32(simd_data(desc), 3, 1);
463     uint8_t vxc, vec_exc = 0;
464     S390Vector tmp = {};
465     int i;
466 
467     for (i = 0; i < 2; i++) {
468         /* load from even element */
469         const float32 a = s390_vec_read_element32(v2, i * 2);
470         const uint64_t ret = float32_to_float64(a, &env->fpu_status);
471 
472         s390_vec_write_element64(&tmp, i, ret);
473         /* indicate the source element */
474         vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
475         if (s || vxc) {
476             break;
477         }
478     }
479     handle_ieee_exc(env, vxc, vec_exc, GETPC());
480     *(S390Vector *)v1 = tmp;
481 }
482 
483 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
484                          uint32_t desc)
485 {
486     const uint8_t erm = extract32(simd_data(desc), 4, 4);
487     const bool s = extract32(simd_data(desc), 3, 1);
488     const bool XxC = extract32(simd_data(desc), 2, 1);
489     uint8_t vxc, vec_exc = 0;
490     S390Vector tmp = {};
491     int i, old_mode;
492 
493     old_mode = s390_swap_bfp_rounding_mode(env, erm);
494     for (i = 0; i < 2; i++) {
495         float64 a = s390_vec_read_element64(v2, i);
496         uint32_t ret = float64_to_float32(a, &env->fpu_status);
497 
498         /* place at even element */
499         s390_vec_write_element32(&tmp, i * 2, ret);
500         /* indicate the source element */
501         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
502         if (s || vxc) {
503             break;
504         }
505     }
506     s390_restore_bfp_rounding_mode(env, old_mode);
507     handle_ieee_exc(env, vxc, vec_exc, GETPC());
508     *(S390Vector *)v1 = tmp;
509 }
510 
511 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
512                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
513                    uintptr_t retaddr)
514 {
515     uint8_t vxc, vec_exc = 0;
516     S390Vector tmp = {};
517     int i;
518 
519     for (i = 0; i < 2; i++) {
520         const float64 a = s390_vec_read_float64(v2, i);
521         const float64 b = s390_vec_read_float64(v3, i);
522         const float64 c = s390_vec_read_float64(v4, i);
523         const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
524 
525         s390_vec_write_float64(&tmp, i, ret);
526         vxc = check_ieee_exc(env, i, false, &vec_exc);
527         if (s || vxc) {
528             break;
529         }
530     }
531     handle_ieee_exc(env, vxc, vec_exc, retaddr);
532     *v1 = tmp;
533 }
534 
535 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS)                                     \
536 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
537                                const void *v4, CPUS390XState *env,             \
538                                uint32_t desc)                                  \
539 {                                                                              \
540     const bool se = extract32(simd_data(desc), 3, 1);                          \
541                                                                                \
542     vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC());                       \
543 }
544 
545 #define DEF_GVEC_VFMA(NAME, FLAGS)                                             \
546     DEF_GVEC_VFMA_B(NAME, FLAGS, 64)
547 
548 DEF_GVEC_VFMA(vfma, 0)
549 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
550 
551 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
552                           uint32_t desc)
553 {
554     const uint16_t i3 = extract32(simd_data(desc), 4, 12);
555     const bool s = extract32(simd_data(desc), 3, 1);
556     int i, match = 0;
557 
558     for (i = 0; i < 2; i++) {
559         const float64 a = s390_vec_read_float64(v2, i);
560 
561         if (float64_dcmask(env, a) & i3) {
562             match++;
563             s390_vec_write_element64(v1, i, -1ull);
564         } else {
565             s390_vec_write_element64(v1, i, 0);
566         }
567         if (s) {
568             break;
569         }
570     }
571 
572     if (match == 2 || (s && match)) {
573         env->cc_op = 0;
574     } else if (match) {
575         env->cc_op = 1;
576     } else {
577         env->cc_op = 3;
578     }
579 }
580