1 /*
2 * QEMU TCG support -- s390x vector floating point instruction support
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "s390x-internal.h"
15 #include "vec.h"
16 #include "tcg_s390x.h"
17 #include "tcg/tcg-gvec-desc.h"
18 #include "exec/helper-proto.h"
19 #include "fpu/softfloat.h"
20
21 #define VIC_INVALID 0x1
22 #define VIC_DIVBYZERO 0x2
23 #define VIC_OVERFLOW 0x3
24 #define VIC_UNDERFLOW 0x4
25 #define VIC_INEXACT 0x5
26
27 /* returns the VEX. If the VEX is 0, there is no trap */
check_ieee_exc(CPUS390XState * env,uint8_t enr,bool XxC,uint8_t * vec_exc)28 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
29 uint8_t *vec_exc)
30 {
31 uint8_t vece_exc = 0, trap_exc;
32 unsigned qemu_exc;
33
34 /* Retrieve and clear the softfloat exceptions */
35 qemu_exc = env->fpu_status.float_exception_flags;
36 if (qemu_exc == 0) {
37 return 0;
38 }
39 env->fpu_status.float_exception_flags = 0;
40
41 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
42
43 /* Add them to the vector-wide s390x exception bits */
44 *vec_exc |= vece_exc;
45
46 /* Check for traps and construct the VXC */
47 trap_exc = vece_exc & env->fpc >> 24;
48 if (trap_exc) {
49 if (trap_exc & S390_IEEE_MASK_INVALID) {
50 return enr << 4 | VIC_INVALID;
51 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
52 return enr << 4 | VIC_DIVBYZERO;
53 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
54 return enr << 4 | VIC_OVERFLOW;
55 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
56 return enr << 4 | VIC_UNDERFLOW;
57 } else if (!XxC) {
58 g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
59 /* inexact has lowest priority on traps */
60 return enr << 4 | VIC_INEXACT;
61 }
62 }
63 return 0;
64 }
65
handle_ieee_exc(CPUS390XState * env,uint8_t vxc,uint8_t vec_exc,uintptr_t retaddr)66 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
67 uintptr_t retaddr)
68 {
69 if (vxc) {
70 /* on traps, the fpc flags are not updated, instruction is suppressed */
71 tcg_s390_vector_exception(env, vxc, retaddr);
72 }
73 if (vec_exc) {
74 /* indicate exceptions for all elements combined */
75 env->fpc |= vec_exc << 16;
76 }
77 }
78
s390_vec_read_float32(const S390Vector * v,uint8_t enr)79 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
80 {
81 return make_float32(s390_vec_read_element32(v, enr));
82 }
83
s390_vec_read_float64(const S390Vector * v,uint8_t enr)84 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
85 {
86 return make_float64(s390_vec_read_element64(v, enr));
87 }
88
s390_vec_read_float128(const S390Vector * v)89 static float128 s390_vec_read_float128(const S390Vector *v)
90 {
91 return make_float128(s390_vec_read_element64(v, 0),
92 s390_vec_read_element64(v, 1));
93 }
94
s390_vec_write_float32(S390Vector * v,uint8_t enr,float32 data)95 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
96 {
97 return s390_vec_write_element32(v, enr, data);
98 }
99
s390_vec_write_float64(S390Vector * v,uint8_t enr,float64 data)100 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
101 {
102 return s390_vec_write_element64(v, enr, data);
103 }
104
s390_vec_write_float128(S390Vector * v,float128 data)105 static void s390_vec_write_float128(S390Vector *v, float128 data)
106 {
107 s390_vec_write_element64(v, 0, data.high);
108 s390_vec_write_element64(v, 1, data.low);
109 }
110
111 typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
vop32_2(S390Vector * v1,const S390Vector * v2,CPUS390XState * env,bool s,bool XxC,uint8_t erm,vop32_2_fn fn,uintptr_t retaddr)112 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
113 bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
114 uintptr_t retaddr)
115 {
116 uint8_t vxc, vec_exc = 0;
117 S390Vector tmp = {};
118 int i, old_mode;
119
120 old_mode = s390_swap_bfp_rounding_mode(env, erm);
121 for (i = 0; i < 4; i++) {
122 const float32 a = s390_vec_read_float32(v2, i);
123
124 s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
125 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
126 if (s || vxc) {
127 break;
128 }
129 }
130 s390_restore_bfp_rounding_mode(env, old_mode);
131 handle_ieee_exc(env, vxc, vec_exc, retaddr);
132 *v1 = tmp;
133 }
134
135 typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
vop64_2(S390Vector * v1,const S390Vector * v2,CPUS390XState * env,bool s,bool XxC,uint8_t erm,vop64_2_fn fn,uintptr_t retaddr)136 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
137 bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
138 uintptr_t retaddr)
139 {
140 uint8_t vxc, vec_exc = 0;
141 S390Vector tmp = {};
142 int i, old_mode;
143
144 old_mode = s390_swap_bfp_rounding_mode(env, erm);
145 for (i = 0; i < 2; i++) {
146 const float64 a = s390_vec_read_float64(v2, i);
147
148 s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
149 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
150 if (s || vxc) {
151 break;
152 }
153 }
154 s390_restore_bfp_rounding_mode(env, old_mode);
155 handle_ieee_exc(env, vxc, vec_exc, retaddr);
156 *v1 = tmp;
157 }
158
159 typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
vop128_2(S390Vector * v1,const S390Vector * v2,CPUS390XState * env,bool s,bool XxC,uint8_t erm,vop128_2_fn fn,uintptr_t retaddr)160 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
161 bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
162 uintptr_t retaddr)
163 {
164 const float128 a = s390_vec_read_float128(v2);
165 uint8_t vxc, vec_exc = 0;
166 S390Vector tmp = {};
167 int old_mode;
168
169 old_mode = s390_swap_bfp_rounding_mode(env, erm);
170 s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
171 vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
172 s390_restore_bfp_rounding_mode(env, old_mode);
173 handle_ieee_exc(env, vxc, vec_exc, retaddr);
174 *v1 = tmp;
175 }
176
vcdg32(float32 a,float_status * s)177 static float32 vcdg32(float32 a, float_status *s)
178 {
179 return int32_to_float32(a, s);
180 }
181
vcdlg32(float32 a,float_status * s)182 static float32 vcdlg32(float32 a, float_status *s)
183 {
184 return uint32_to_float32(a, s);
185 }
186
vcgd32(float32 a,float_status * s)187 static float32 vcgd32(float32 a, float_status *s)
188 {
189 const float32 tmp = float32_to_int32(a, s);
190
191 return float32_is_any_nan(a) ? INT32_MIN : tmp;
192 }
193
vclgd32(float32 a,float_status * s)194 static float32 vclgd32(float32 a, float_status *s)
195 {
196 const float32 tmp = float32_to_uint32(a, s);
197
198 return float32_is_any_nan(a) ? 0 : tmp;
199 }
200
vcdg64(float64 a,float_status * s)201 static float64 vcdg64(float64 a, float_status *s)
202 {
203 return int64_to_float64(a, s);
204 }
205
vcdlg64(float64 a,float_status * s)206 static float64 vcdlg64(float64 a, float_status *s)
207 {
208 return uint64_to_float64(a, s);
209 }
210
vcgd64(float64 a,float_status * s)211 static float64 vcgd64(float64 a, float_status *s)
212 {
213 const float64 tmp = float64_to_int64(a, s);
214
215 return float64_is_any_nan(a) ? INT64_MIN : tmp;
216 }
217
vclgd64(float64 a,float_status * s)218 static float64 vclgd64(float64 a, float_status *s)
219 {
220 const float64 tmp = float64_to_uint64(a, s);
221
222 return float64_is_any_nan(a) ? 0 : tmp;
223 }
224
225 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \
226 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \
227 uint32_t desc) \
228 { \
229 const uint8_t erm = extract32(simd_data(desc), 4, 4); \
230 const bool se = extract32(simd_data(desc), 3, 1); \
231 const bool XxC = extract32(simd_data(desc), 2, 1); \
232 \
233 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \
234 }
235
236 #define DEF_GVEC_VOP2_32(NAME) \
237 DEF_GVEC_VOP2_FN(NAME, NAME##32, 32)
238
239 #define DEF_GVEC_VOP2_64(NAME) \
240 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
241
242 #define DEF_GVEC_VOP2(NAME, OP) \
243 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \
244 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \
245 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
246
247 DEF_GVEC_VOP2_32(vcdg)
248 DEF_GVEC_VOP2_32(vcdlg)
249 DEF_GVEC_VOP2_32(vcgd)
250 DEF_GVEC_VOP2_32(vclgd)
251 DEF_GVEC_VOP2_64(vcdg)
252 DEF_GVEC_VOP2_64(vcdlg)
253 DEF_GVEC_VOP2_64(vcgd)
254 DEF_GVEC_VOP2_64(vclgd)
255 DEF_GVEC_VOP2(vfi, round_to_int)
256 DEF_GVEC_VOP2(vfsq, sqrt)
257
258 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
vop32_3(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vop32_3_fn fn,uintptr_t retaddr)259 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
260 CPUS390XState *env, bool s, vop32_3_fn fn,
261 uintptr_t retaddr)
262 {
263 uint8_t vxc, vec_exc = 0;
264 S390Vector tmp = {};
265 int i;
266
267 for (i = 0; i < 4; i++) {
268 const float32 a = s390_vec_read_float32(v2, i);
269 const float32 b = s390_vec_read_float32(v3, i);
270
271 s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
272 vxc = check_ieee_exc(env, i, false, &vec_exc);
273 if (s || vxc) {
274 break;
275 }
276 }
277 handle_ieee_exc(env, vxc, vec_exc, retaddr);
278 *v1 = tmp;
279 }
280
281 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
vop64_3(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vop64_3_fn fn,uintptr_t retaddr)282 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
283 CPUS390XState *env, bool s, vop64_3_fn fn,
284 uintptr_t retaddr)
285 {
286 uint8_t vxc, vec_exc = 0;
287 S390Vector tmp = {};
288 int i;
289
290 for (i = 0; i < 2; i++) {
291 const float64 a = s390_vec_read_float64(v2, i);
292 const float64 b = s390_vec_read_float64(v3, i);
293
294 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
295 vxc = check_ieee_exc(env, i, false, &vec_exc);
296 if (s || vxc) {
297 break;
298 }
299 }
300 handle_ieee_exc(env, vxc, vec_exc, retaddr);
301 *v1 = tmp;
302 }
303
304 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
vop128_3(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vop128_3_fn fn,uintptr_t retaddr)305 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
306 CPUS390XState *env, bool s, vop128_3_fn fn,
307 uintptr_t retaddr)
308 {
309 const float128 a = s390_vec_read_float128(v2);
310 const float128 b = s390_vec_read_float128(v3);
311 uint8_t vxc, vec_exc = 0;
312 S390Vector tmp = {};
313
314 s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
315 vxc = check_ieee_exc(env, 0, false, &vec_exc);
316 handle_ieee_exc(env, vxc, vec_exc, retaddr);
317 *v1 = tmp;
318 }
319
320 #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \
321 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
322 CPUS390XState *env, uint32_t desc) \
323 { \
324 const bool se = extract32(simd_data(desc), 3, 1); \
325 \
326 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \
327 }
328
329 #define DEF_GVEC_VOP3(NAME, OP) \
330 DEF_GVEC_VOP3_B(NAME, OP, 32) \
331 DEF_GVEC_VOP3_B(NAME, OP, 64) \
332 DEF_GVEC_VOP3_B(NAME, OP, 128)
333
DEF_GVEC_VOP3(vfa,add)334 DEF_GVEC_VOP3(vfa, add)
335 DEF_GVEC_VOP3(vfs, sub)
336 DEF_GVEC_VOP3(vfd, div)
337 DEF_GVEC_VOP3(vfm, mul)
338
339 static int wfc32(const S390Vector *v1, const S390Vector *v2,
340 CPUS390XState *env, bool signal, uintptr_t retaddr)
341 {
342 /* only the zero-indexed elements are compared */
343 const float32 a = s390_vec_read_float32(v1, 0);
344 const float32 b = s390_vec_read_float32(v2, 0);
345 uint8_t vxc, vec_exc = 0;
346 int cmp;
347
348 if (signal) {
349 cmp = float32_compare(a, b, &env->fpu_status);
350 } else {
351 cmp = float32_compare_quiet(a, b, &env->fpu_status);
352 }
353 vxc = check_ieee_exc(env, 0, false, &vec_exc);
354 handle_ieee_exc(env, vxc, vec_exc, retaddr);
355
356 return float_comp_to_cc(env, cmp);
357 }
358
wfc64(const S390Vector * v1,const S390Vector * v2,CPUS390XState * env,bool signal,uintptr_t retaddr)359 static int wfc64(const S390Vector *v1, const S390Vector *v2,
360 CPUS390XState *env, bool signal, uintptr_t retaddr)
361 {
362 /* only the zero-indexed elements are compared */
363 const float64 a = s390_vec_read_float64(v1, 0);
364 const float64 b = s390_vec_read_float64(v2, 0);
365 uint8_t vxc, vec_exc = 0;
366 int cmp;
367
368 if (signal) {
369 cmp = float64_compare(a, b, &env->fpu_status);
370 } else {
371 cmp = float64_compare_quiet(a, b, &env->fpu_status);
372 }
373 vxc = check_ieee_exc(env, 0, false, &vec_exc);
374 handle_ieee_exc(env, vxc, vec_exc, retaddr);
375
376 return float_comp_to_cc(env, cmp);
377 }
378
wfc128(const S390Vector * v1,const S390Vector * v2,CPUS390XState * env,bool signal,uintptr_t retaddr)379 static int wfc128(const S390Vector *v1, const S390Vector *v2,
380 CPUS390XState *env, bool signal, uintptr_t retaddr)
381 {
382 /* only the zero-indexed elements are compared */
383 const float128 a = s390_vec_read_float128(v1);
384 const float128 b = s390_vec_read_float128(v2);
385 uint8_t vxc, vec_exc = 0;
386 int cmp;
387
388 if (signal) {
389 cmp = float128_compare(a, b, &env->fpu_status);
390 } else {
391 cmp = float128_compare_quiet(a, b, &env->fpu_status);
392 }
393 vxc = check_ieee_exc(env, 0, false, &vec_exc);
394 handle_ieee_exc(env, vxc, vec_exc, retaddr);
395
396 return float_comp_to_cc(env, cmp);
397 }
398
399 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \
400 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \
401 CPUS390XState *env, uint32_t desc) \
402 { \
403 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \
404 }
405
406 #define DEF_GVEC_WFC(NAME, SIGNAL) \
407 DEF_GVEC_WFC_B(NAME, SIGNAL, 32) \
408 DEF_GVEC_WFC_B(NAME, SIGNAL, 64) \
409 DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
410
411 DEF_GVEC_WFC(wfc, false)
412 DEF_GVEC_WFC(wfk, true)
413
414 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
vfc32(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vfc32_fn fn,uintptr_t retaddr)415 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
416 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
417 {
418 uint8_t vxc, vec_exc = 0;
419 S390Vector tmp = {};
420 int match = 0;
421 int i;
422
423 for (i = 0; i < 4; i++) {
424 const float32 a = s390_vec_read_float32(v2, i);
425 const float32 b = s390_vec_read_float32(v3, i);
426
427 /* swap the order of the parameters, so we can use existing functions */
428 if (fn(b, a, &env->fpu_status)) {
429 match++;
430 s390_vec_write_element32(&tmp, i, -1u);
431 }
432 vxc = check_ieee_exc(env, i, false, &vec_exc);
433 if (s || vxc) {
434 break;
435 }
436 }
437
438 handle_ieee_exc(env, vxc, vec_exc, retaddr);
439 *v1 = tmp;
440 if (match) {
441 return s || match == 4 ? 0 : 1;
442 }
443 return 3;
444 }
445
446 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
vfc64(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vfc64_fn fn,uintptr_t retaddr)447 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
448 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
449 {
450 uint8_t vxc, vec_exc = 0;
451 S390Vector tmp = {};
452 int match = 0;
453 int i;
454
455 for (i = 0; i < 2; i++) {
456 const float64 a = s390_vec_read_float64(v2, i);
457 const float64 b = s390_vec_read_float64(v3, i);
458
459 /* swap the order of the parameters, so we can use existing functions */
460 if (fn(b, a, &env->fpu_status)) {
461 match++;
462 s390_vec_write_element64(&tmp, i, -1ull);
463 }
464 vxc = check_ieee_exc(env, i, false, &vec_exc);
465 if (s || vxc) {
466 break;
467 }
468 }
469
470 handle_ieee_exc(env, vxc, vec_exc, retaddr);
471 *v1 = tmp;
472 if (match) {
473 return s || match == 2 ? 0 : 1;
474 }
475 return 3;
476 }
477
478 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
vfc128(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,bool s,vfc128_fn fn,uintptr_t retaddr)479 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
480 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
481 {
482 const float128 a = s390_vec_read_float128(v2);
483 const float128 b = s390_vec_read_float128(v3);
484 uint8_t vxc, vec_exc = 0;
485 S390Vector tmp = {};
486 bool match = false;
487
488 /* swap the order of the parameters, so we can use existing functions */
489 if (fn(b, a, &env->fpu_status)) {
490 match = true;
491 s390_vec_write_element64(&tmp, 0, -1ull);
492 s390_vec_write_element64(&tmp, 1, -1ull);
493 }
494 vxc = check_ieee_exc(env, 0, false, &vec_exc);
495 handle_ieee_exc(env, vxc, vec_exc, retaddr);
496 *v1 = tmp;
497 return match ? 0 : 3;
498 }
499
500 #define DEF_GVEC_VFC_B(NAME, OP, BITS) \
501 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
502 CPUS390XState *env, uint32_t desc) \
503 { \
504 const bool se = extract32(simd_data(desc), 3, 1); \
505 const bool sq = extract32(simd_data(desc), 2, 1); \
506 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \
507 \
508 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
509 } \
510 \
511 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \
512 CPUS390XState *env, uint32_t desc) \
513 { \
514 const bool se = extract32(simd_data(desc), 3, 1); \
515 const bool sq = extract32(simd_data(desc), 2, 1); \
516 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \
517 \
518 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
519 }
520
521 #define DEF_GVEC_VFC(NAME, OP) \
522 DEF_GVEC_VFC_B(NAME, OP, 32) \
523 DEF_GVEC_VFC_B(NAME, OP, 64) \
524 DEF_GVEC_VFC_B(NAME, OP, 128) \
525
DEF_GVEC_VFC(vfce,eq)526 DEF_GVEC_VFC(vfce, eq)
527 DEF_GVEC_VFC(vfch, lt)
528 DEF_GVEC_VFC(vfche, le)
529
530 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
531 uint32_t desc)
532 {
533 const bool s = extract32(simd_data(desc), 3, 1);
534 uint8_t vxc, vec_exc = 0;
535 S390Vector tmp = {};
536 int i;
537
538 for (i = 0; i < 2; i++) {
539 /* load from even element */
540 const float32 a = s390_vec_read_element32(v2, i * 2);
541 const uint64_t ret = float32_to_float64(a, &env->fpu_status);
542
543 s390_vec_write_element64(&tmp, i, ret);
544 /* indicate the source element */
545 vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
546 if (s || vxc) {
547 break;
548 }
549 }
550 handle_ieee_exc(env, vxc, vec_exc, GETPC());
551 *(S390Vector *)v1 = tmp;
552 }
553
HELPER(gvec_vfll64)554 void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
555 uint32_t desc)
556 {
557 /* load from even element */
558 const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
559 &env->fpu_status);
560 uint8_t vxc, vec_exc = 0;
561
562 vxc = check_ieee_exc(env, 0, false, &vec_exc);
563 handle_ieee_exc(env, vxc, vec_exc, GETPC());
564 s390_vec_write_float128(v1, ret);
565 }
566
HELPER(gvec_vflr64)567 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
568 uint32_t desc)
569 {
570 const uint8_t erm = extract32(simd_data(desc), 4, 4);
571 const bool s = extract32(simd_data(desc), 3, 1);
572 const bool XxC = extract32(simd_data(desc), 2, 1);
573 uint8_t vxc, vec_exc = 0;
574 S390Vector tmp = {};
575 int i, old_mode;
576
577 old_mode = s390_swap_bfp_rounding_mode(env, erm);
578 for (i = 0; i < 2; i++) {
579 float64 a = s390_vec_read_element64(v2, i);
580 uint32_t ret = float64_to_float32(a, &env->fpu_status);
581
582 /* place at even element */
583 s390_vec_write_element32(&tmp, i * 2, ret);
584 /* indicate the source element */
585 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
586 if (s || vxc) {
587 break;
588 }
589 }
590 s390_restore_bfp_rounding_mode(env, old_mode);
591 handle_ieee_exc(env, vxc, vec_exc, GETPC());
592 *(S390Vector *)v1 = tmp;
593 }
594
HELPER(gvec_vflr128)595 void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
596 uint32_t desc)
597 {
598 const uint8_t erm = extract32(simd_data(desc), 4, 4);
599 const bool XxC = extract32(simd_data(desc), 2, 1);
600 uint8_t vxc, vec_exc = 0;
601 int old_mode;
602 float64 ret;
603
604 old_mode = s390_swap_bfp_rounding_mode(env, erm);
605 ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
606 vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
607 s390_restore_bfp_rounding_mode(env, old_mode);
608 handle_ieee_exc(env, vxc, vec_exc, GETPC());
609
610 /* place at even element, odd element is unpredictable */
611 s390_vec_write_float64(v1, 0, ret);
612 }
613
vfma32(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,const S390Vector * v4,CPUS390XState * env,bool s,int flags,uintptr_t retaddr)614 static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
615 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
616 uintptr_t retaddr)
617 {
618 uint8_t vxc, vec_exc = 0;
619 S390Vector tmp = {};
620 int i;
621
622 for (i = 0; i < 4; i++) {
623 const float32 a = s390_vec_read_float32(v3, i);
624 const float32 b = s390_vec_read_float32(v2, i);
625 const float32 c = s390_vec_read_float32(v4, i);
626 float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
627
628 s390_vec_write_float32(&tmp, i, ret);
629 vxc = check_ieee_exc(env, i, false, &vec_exc);
630 if (s || vxc) {
631 break;
632 }
633 }
634 handle_ieee_exc(env, vxc, vec_exc, retaddr);
635 *v1 = tmp;
636 }
637
vfma64(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,const S390Vector * v4,CPUS390XState * env,bool s,int flags,uintptr_t retaddr)638 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
639 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
640 uintptr_t retaddr)
641 {
642 uint8_t vxc, vec_exc = 0;
643 S390Vector tmp = {};
644 int i;
645
646 for (i = 0; i < 2; i++) {
647 const float64 a = s390_vec_read_float64(v3, i);
648 const float64 b = s390_vec_read_float64(v2, i);
649 const float64 c = s390_vec_read_float64(v4, i);
650 const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
651
652 s390_vec_write_float64(&tmp, i, ret);
653 vxc = check_ieee_exc(env, i, false, &vec_exc);
654 if (s || vxc) {
655 break;
656 }
657 }
658 handle_ieee_exc(env, vxc, vec_exc, retaddr);
659 *v1 = tmp;
660 }
661
vfma128(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,const S390Vector * v4,CPUS390XState * env,bool s,int flags,uintptr_t retaddr)662 static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
663 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
664 uintptr_t retaddr)
665 {
666 const float128 a = s390_vec_read_float128(v3);
667 const float128 b = s390_vec_read_float128(v2);
668 const float128 c = s390_vec_read_float128(v4);
669 uint8_t vxc, vec_exc = 0;
670 float128 ret;
671
672 ret = float128_muladd(a, b, c, flags, &env->fpu_status);
673 vxc = check_ieee_exc(env, 0, false, &vec_exc);
674 handle_ieee_exc(env, vxc, vec_exc, retaddr);
675 s390_vec_write_float128(v1, ret);
676 }
677
678 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \
679 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
680 const void *v4, CPUS390XState *env, \
681 uint32_t desc) \
682 { \
683 const bool se = extract32(simd_data(desc), 3, 1); \
684 \
685 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \
686 }
687
688 #define DEF_GVEC_VFMA(NAME, FLAGS) \
689 DEF_GVEC_VFMA_B(NAME, FLAGS, 32) \
690 DEF_GVEC_VFMA_B(NAME, FLAGS, 64) \
691 DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
692
693 DEF_GVEC_VFMA(vfma, 0)
DEF_GVEC_VFMA(vfms,float_muladd_negate_c)694 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
695 DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
696 DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
697
698 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
699 uint32_t desc)
700 {
701 uint16_t i3 = extract32(simd_data(desc), 4, 12);
702 bool s = extract32(simd_data(desc), 3, 1);
703 int i, match = 0;
704
705 for (i = 0; i < 4; i++) {
706 float32 a = s390_vec_read_float32(v2, i);
707
708 if (float32_dcmask(env, a) & i3) {
709 match++;
710 s390_vec_write_element32(v1, i, -1u);
711 } else {
712 s390_vec_write_element32(v1, i, 0);
713 }
714 if (s) {
715 break;
716 }
717 }
718
719 if (match == 4 || (s && match)) {
720 env->cc_op = 0;
721 } else if (match) {
722 env->cc_op = 1;
723 } else {
724 env->cc_op = 3;
725 }
726 }
727
HELPER(gvec_vftci64)728 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
729 uint32_t desc)
730 {
731 const uint16_t i3 = extract32(simd_data(desc), 4, 12);
732 const bool s = extract32(simd_data(desc), 3, 1);
733 int i, match = 0;
734
735 for (i = 0; i < 2; i++) {
736 const float64 a = s390_vec_read_float64(v2, i);
737
738 if (float64_dcmask(env, a) & i3) {
739 match++;
740 s390_vec_write_element64(v1, i, -1ull);
741 } else {
742 s390_vec_write_element64(v1, i, 0);
743 }
744 if (s) {
745 break;
746 }
747 }
748
749 if (match == 2 || (s && match)) {
750 env->cc_op = 0;
751 } else if (match) {
752 env->cc_op = 1;
753 } else {
754 env->cc_op = 3;
755 }
756 }
757
HELPER(gvec_vftci128)758 void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
759 uint32_t desc)
760 {
761 const float128 a = s390_vec_read_float128(v2);
762 uint16_t i3 = extract32(simd_data(desc), 4, 12);
763
764 if (float128_dcmask(env, a) & i3) {
765 env->cc_op = 0;
766 s390_vec_write_element64(v1, 0, -1ull);
767 s390_vec_write_element64(v1, 1, -1ull);
768 } else {
769 env->cc_op = 3;
770 s390_vec_write_element64(v1, 0, 0);
771 s390_vec_write_element64(v1, 1, 0);
772 }
773 }
774
775 typedef enum S390MinMaxType {
776 S390_MINMAX_TYPE_IEEE = 0,
777 S390_MINMAX_TYPE_JAVA,
778 S390_MINMAX_TYPE_C_MACRO,
779 S390_MINMAX_TYPE_CPP,
780 S390_MINMAX_TYPE_F,
781 } S390MinMaxType;
782
783 typedef enum S390MinMaxRes {
784 S390_MINMAX_RES_MINMAX = 0,
785 S390_MINMAX_RES_A,
786 S390_MINMAX_RES_B,
787 S390_MINMAX_RES_SILENCE_A,
788 S390_MINMAX_RES_SILENCE_B,
789 } S390MinMaxRes;
790
vfmin_res(uint16_t dcmask_a,uint16_t dcmask_b,S390MinMaxType type,float_status * s)791 static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b,
792 S390MinMaxType type, float_status *s)
793 {
794 const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
795 const bool nan_a = dcmask_a & DCMASK_NAN;
796 const bool nan_b = dcmask_b & DCMASK_NAN;
797
798 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
799
800 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
801 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
802 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
803
804 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
805 s->float_exception_flags |= float_flag_invalid;
806 }
807 switch (type) {
808 case S390_MINMAX_TYPE_JAVA:
809 if (sig_a) {
810 return S390_MINMAX_RES_SILENCE_A;
811 } else if (sig_b) {
812 return S390_MINMAX_RES_SILENCE_B;
813 }
814 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
815 case S390_MINMAX_TYPE_F:
816 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
817 case S390_MINMAX_TYPE_C_MACRO:
818 s->float_exception_flags |= float_flag_invalid;
819 return S390_MINMAX_RES_B;
820 case S390_MINMAX_TYPE_CPP:
821 s->float_exception_flags |= float_flag_invalid;
822 return S390_MINMAX_RES_A;
823 default:
824 g_assert_not_reached();
825 }
826 } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) {
827 switch (type) {
828 case S390_MINMAX_TYPE_JAVA:
829 return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
830 case S390_MINMAX_TYPE_C_MACRO:
831 return S390_MINMAX_RES_B;
832 case S390_MINMAX_TYPE_F:
833 return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
834 case S390_MINMAX_TYPE_CPP:
835 return S390_MINMAX_RES_A;
836 default:
837 g_assert_not_reached();
838 }
839 }
840 return S390_MINMAX_RES_MINMAX;
841 }
842
vfmax_res(uint16_t dcmask_a,uint16_t dcmask_b,S390MinMaxType type,float_status * s)843 static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
844 S390MinMaxType type, float_status *s)
845 {
846 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
847
848 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
849 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
850 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
851 const bool nan_a = dcmask_a & DCMASK_NAN;
852 const bool nan_b = dcmask_b & DCMASK_NAN;
853
854 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
855 s->float_exception_flags |= float_flag_invalid;
856 }
857 switch (type) {
858 case S390_MINMAX_TYPE_JAVA:
859 if (sig_a) {
860 return S390_MINMAX_RES_SILENCE_A;
861 } else if (sig_b) {
862 return S390_MINMAX_RES_SILENCE_B;
863 }
864 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
865 case S390_MINMAX_TYPE_F:
866 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
867 case S390_MINMAX_TYPE_C_MACRO:
868 s->float_exception_flags |= float_flag_invalid;
869 return S390_MINMAX_RES_B;
870 case S390_MINMAX_TYPE_CPP:
871 s->float_exception_flags |= float_flag_invalid;
872 return S390_MINMAX_RES_A;
873 default:
874 g_assert_not_reached();
875 }
876 } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) {
877 const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
878
879 switch (type) {
880 case S390_MINMAX_TYPE_JAVA:
881 case S390_MINMAX_TYPE_F:
882 return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
883 case S390_MINMAX_TYPE_C_MACRO:
884 return S390_MINMAX_RES_B;
885 case S390_MINMAX_TYPE_CPP:
886 return S390_MINMAX_RES_A;
887 default:
888 g_assert_not_reached();
889 }
890 }
891 return S390_MINMAX_RES_MINMAX;
892 }
893
vfminmax_res(uint16_t dcmask_a,uint16_t dcmask_b,S390MinMaxType type,bool is_min,float_status * s)894 static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
895 S390MinMaxType type, bool is_min,
896 float_status *s)
897 {
898 return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) :
899 vfmax_res(dcmask_a, dcmask_b, type, s);
900 }
901
vfminmax32(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,S390MinMaxType type,bool is_min,bool is_abs,bool se,uintptr_t retaddr)902 static void vfminmax32(S390Vector *v1, const S390Vector *v2,
903 const S390Vector *v3, CPUS390XState *env,
904 S390MinMaxType type, bool is_min, bool is_abs, bool se,
905 uintptr_t retaddr)
906 {
907 float_status *s = &env->fpu_status;
908 uint8_t vxc, vec_exc = 0;
909 S390Vector tmp = {};
910 int i;
911
912 for (i = 0; i < 4; i++) {
913 float32 a = s390_vec_read_float32(v2, i);
914 float32 b = s390_vec_read_float32(v3, i);
915 float32 result;
916
917 if (type != S390_MINMAX_TYPE_IEEE) {
918 S390MinMaxRes res;
919
920 if (is_abs) {
921 a = float32_abs(a);
922 b = float32_abs(b);
923 }
924
925 res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b),
926 type, is_min, s);
927 switch (res) {
928 case S390_MINMAX_RES_MINMAX:
929 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s);
930 break;
931 case S390_MINMAX_RES_A:
932 result = a;
933 break;
934 case S390_MINMAX_RES_B:
935 result = b;
936 break;
937 case S390_MINMAX_RES_SILENCE_A:
938 result = float32_silence_nan(a, s);
939 break;
940 case S390_MINMAX_RES_SILENCE_B:
941 result = float32_silence_nan(b, s);
942 break;
943 default:
944 g_assert_not_reached();
945 }
946 } else if (!is_abs) {
947 result = is_min ? float32_minnum(a, b, &env->fpu_status) :
948 float32_maxnum(a, b, &env->fpu_status);
949 } else {
950 result = is_min ? float32_minnummag(a, b, &env->fpu_status) :
951 float32_maxnummag(a, b, &env->fpu_status);
952 }
953
954 s390_vec_write_float32(&tmp, i, result);
955 vxc = check_ieee_exc(env, i, false, &vec_exc);
956 if (se || vxc) {
957 break;
958 }
959 }
960 handle_ieee_exc(env, vxc, vec_exc, retaddr);
961 *v1 = tmp;
962 }
963
vfminmax64(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,S390MinMaxType type,bool is_min,bool is_abs,bool se,uintptr_t retaddr)964 static void vfminmax64(S390Vector *v1, const S390Vector *v2,
965 const S390Vector *v3, CPUS390XState *env,
966 S390MinMaxType type, bool is_min, bool is_abs, bool se,
967 uintptr_t retaddr)
968 {
969 float_status *s = &env->fpu_status;
970 uint8_t vxc, vec_exc = 0;
971 S390Vector tmp = {};
972 int i;
973
974 for (i = 0; i < 2; i++) {
975 float64 a = s390_vec_read_float64(v2, i);
976 float64 b = s390_vec_read_float64(v3, i);
977 float64 result;
978
979 if (type != S390_MINMAX_TYPE_IEEE) {
980 S390MinMaxRes res;
981
982 if (is_abs) {
983 a = float64_abs(a);
984 b = float64_abs(b);
985 }
986
987 res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b),
988 type, is_min, s);
989 switch (res) {
990 case S390_MINMAX_RES_MINMAX:
991 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s);
992 break;
993 case S390_MINMAX_RES_A:
994 result = a;
995 break;
996 case S390_MINMAX_RES_B:
997 result = b;
998 break;
999 case S390_MINMAX_RES_SILENCE_A:
1000 result = float64_silence_nan(a, s);
1001 break;
1002 case S390_MINMAX_RES_SILENCE_B:
1003 result = float64_silence_nan(b, s);
1004 break;
1005 default:
1006 g_assert_not_reached();
1007 }
1008 } else if (!is_abs) {
1009 result = is_min ? float64_minnum(a, b, &env->fpu_status) :
1010 float64_maxnum(a, b, &env->fpu_status);
1011 } else {
1012 result = is_min ? float64_minnummag(a, b, &env->fpu_status) :
1013 float64_maxnummag(a, b, &env->fpu_status);
1014 }
1015
1016 s390_vec_write_float64(&tmp, i, result);
1017 vxc = check_ieee_exc(env, i, false, &vec_exc);
1018 if (se || vxc) {
1019 break;
1020 }
1021 }
1022 handle_ieee_exc(env, vxc, vec_exc, retaddr);
1023 *v1 = tmp;
1024 }
1025
vfminmax128(S390Vector * v1,const S390Vector * v2,const S390Vector * v3,CPUS390XState * env,S390MinMaxType type,bool is_min,bool is_abs,bool se,uintptr_t retaddr)1026 static void vfminmax128(S390Vector *v1, const S390Vector *v2,
1027 const S390Vector *v3, CPUS390XState *env,
1028 S390MinMaxType type, bool is_min, bool is_abs, bool se,
1029 uintptr_t retaddr)
1030 {
1031 float128 a = s390_vec_read_float128(v2);
1032 float128 b = s390_vec_read_float128(v3);
1033 float_status *s = &env->fpu_status;
1034 uint8_t vxc, vec_exc = 0;
1035 float128 result;
1036
1037 if (type != S390_MINMAX_TYPE_IEEE) {
1038 S390MinMaxRes res;
1039
1040 if (is_abs) {
1041 a = float128_abs(a);
1042 b = float128_abs(b);
1043 }
1044
1045 res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b),
1046 type, is_min, s);
1047 switch (res) {
1048 case S390_MINMAX_RES_MINMAX:
1049 result = is_min ? float128_min(a, b, s) : float128_max(a, b, s);
1050 break;
1051 case S390_MINMAX_RES_A:
1052 result = a;
1053 break;
1054 case S390_MINMAX_RES_B:
1055 result = b;
1056 break;
1057 case S390_MINMAX_RES_SILENCE_A:
1058 result = float128_silence_nan(a, s);
1059 break;
1060 case S390_MINMAX_RES_SILENCE_B:
1061 result = float128_silence_nan(b, s);
1062 break;
1063 default:
1064 g_assert_not_reached();
1065 }
1066 } else if (!is_abs) {
1067 result = is_min ? float128_minnum(a, b, &env->fpu_status) :
1068 float128_maxnum(a, b, &env->fpu_status);
1069 } else {
1070 result = is_min ? float128_minnummag(a, b, &env->fpu_status) :
1071 float128_maxnummag(a, b, &env->fpu_status);
1072 }
1073
1074 vxc = check_ieee_exc(env, 0, false, &vec_exc);
1075 handle_ieee_exc(env, vxc, vec_exc, retaddr);
1076 s390_vec_write_float128(v1, result);
1077 }
1078
1079 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS) \
1080 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
1081 CPUS390XState *env, uint32_t desc) \
1082 { \
1083 const bool se = extract32(simd_data(desc), 3, 1); \
1084 uint8_t type = extract32(simd_data(desc), 4, 4); \
1085 bool is_abs = false; \
1086 \
1087 if (type >= 8) { \
1088 is_abs = true; \
1089 type -= 8; \
1090 } \
1091 \
1092 vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC()); \
1093 }
1094
1095 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN) \
1096 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32) \
1097 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64) \
1098 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
1099
1100 DEF_GVEC_VFMINMAX(vfmax, false)
1101 DEF_GVEC_VFMINMAX(vfmin, true)
1102