1 /* 2 * QEMU TCG support -- s390x vector floating point instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "cpu.h" 14 #include "s390x-internal.h" 15 #include "vec.h" 16 #include "tcg_s390x.h" 17 #include "tcg/tcg-gvec-desc.h" 18 #include "exec/helper-proto.h" 19 #include "fpu/softfloat.h" 20 21 #define VIC_INVALID 0x1 22 #define VIC_DIVBYZERO 0x2 23 #define VIC_OVERFLOW 0x3 24 #define VIC_UNDERFLOW 0x4 25 #define VIC_INEXACT 0x5 26 27 /* returns the VEX. If the VEX is 0, there is no trap */ 28 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, 29 uint8_t *vec_exc) 30 { 31 uint8_t vece_exc = 0, trap_exc; 32 unsigned qemu_exc; 33 34 /* Retrieve and clear the softfloat exceptions */ 35 qemu_exc = env->fpu_status.float_exception_flags; 36 if (qemu_exc == 0) { 37 return 0; 38 } 39 env->fpu_status.float_exception_flags = 0; 40 41 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); 42 43 /* Add them to the vector-wide s390x exception bits */ 44 *vec_exc |= vece_exc; 45 46 /* Check for traps and construct the VXC */ 47 trap_exc = vece_exc & env->fpc >> 24; 48 if (trap_exc) { 49 if (trap_exc & S390_IEEE_MASK_INVALID) { 50 return enr << 4 | VIC_INVALID; 51 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { 52 return enr << 4 | VIC_DIVBYZERO; 53 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { 54 return enr << 4 | VIC_OVERFLOW; 55 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { 56 return enr << 4 | VIC_UNDERFLOW; 57 } else if (!XxC) { 58 g_assert(trap_exc & S390_IEEE_MASK_INEXACT); 59 /* inexact has lowest priority on traps */ 60 return enr << 4 | VIC_INEXACT; 61 } 62 } 63 return 0; 64 } 65 66 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, 67 uintptr_t retaddr) 68 { 69 if (vxc) { 70 /* on traps, the fpc flags are not updated, instruction is suppressed */ 71 tcg_s390_vector_exception(env, vxc, retaddr); 72 } 73 if (vec_exc) { 74 /* indicate exceptions for all elements combined */ 75 env->fpc |= vec_exc << 16; 76 } 77 } 78 79 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr) 80 { 81 return make_float32(s390_vec_read_element32(v, enr)); 82 } 83 84 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr) 85 { 86 return make_float64(s390_vec_read_element64(v, enr)); 87 } 88 89 static float128 s390_vec_read_float128(const S390Vector *v) 90 { 91 return make_float128(s390_vec_read_element64(v, 0), 92 s390_vec_read_element64(v, 1)); 93 } 94 95 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data) 96 { 97 return s390_vec_write_element32(v, enr, data); 98 } 99 100 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data) 101 { 102 return s390_vec_write_element64(v, enr, data); 103 } 104 105 static void s390_vec_write_float128(S390Vector *v, float128 data) 106 { 107 s390_vec_write_element64(v, 0, data.high); 108 s390_vec_write_element64(v, 1, data.low); 109 } 110 111 typedef float32 (*vop32_2_fn)(float32 a, float_status *s); 112 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 113 bool s, bool XxC, uint8_t erm, vop32_2_fn fn, 114 uintptr_t retaddr) 115 { 116 uint8_t vxc, vec_exc = 0; 117 S390Vector tmp = {}; 118 int i, old_mode; 119 120 old_mode = s390_swap_bfp_rounding_mode(env, erm); 121 for (i = 0; i < 4; i++) { 122 const float32 a = s390_vec_read_float32(v2, i); 123 124 s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status)); 125 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 126 if (s || vxc) { 127 break; 128 } 129 } 130 s390_restore_bfp_rounding_mode(env, old_mode); 131 handle_ieee_exc(env, vxc, vec_exc, retaddr); 132 *v1 = tmp; 133 } 134 135 typedef float64 (*vop64_2_fn)(float64 a, float_status *s); 136 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 137 bool s, bool XxC, uint8_t erm, vop64_2_fn fn, 138 uintptr_t retaddr) 139 { 140 uint8_t vxc, vec_exc = 0; 141 S390Vector tmp = {}; 142 int i, old_mode; 143 144 old_mode = s390_swap_bfp_rounding_mode(env, erm); 145 for (i = 0; i < 2; i++) { 146 const float64 a = s390_vec_read_float64(v2, i); 147 148 s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status)); 149 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 150 if (s || vxc) { 151 break; 152 } 153 } 154 s390_restore_bfp_rounding_mode(env, old_mode); 155 handle_ieee_exc(env, vxc, vec_exc, retaddr); 156 *v1 = tmp; 157 } 158 159 typedef float128 (*vop128_2_fn)(float128 a, float_status *s); 160 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 161 bool s, bool XxC, uint8_t erm, vop128_2_fn fn, 162 uintptr_t retaddr) 163 { 164 const float128 a = s390_vec_read_float128(v2); 165 uint8_t vxc, vec_exc = 0; 166 S390Vector tmp = {}; 167 int old_mode; 168 169 old_mode = s390_swap_bfp_rounding_mode(env, erm); 170 s390_vec_write_float128(&tmp, fn(a, &env->fpu_status)); 171 vxc = check_ieee_exc(env, 0, XxC, &vec_exc); 172 s390_restore_bfp_rounding_mode(env, old_mode); 173 handle_ieee_exc(env, vxc, vec_exc, retaddr); 174 *v1 = tmp; 175 } 176 177 static float32 vcdg32(float32 a, float_status *s) 178 { 179 return int32_to_float32(a, s); 180 } 181 182 static float32 vcdlg32(float32 a, float_status *s) 183 { 184 return uint32_to_float32(a, s); 185 } 186 187 static float32 vcgd32(float32 a, float_status *s) 188 { 189 const float32 tmp = float32_to_int32(a, s); 190 191 return float32_is_any_nan(a) ? INT32_MIN : tmp; 192 } 193 194 static float32 vclgd32(float32 a, float_status *s) 195 { 196 const float32 tmp = float32_to_uint32(a, s); 197 198 return float32_is_any_nan(a) ? 0 : tmp; 199 } 200 201 static float64 vcdg64(float64 a, float_status *s) 202 { 203 return int64_to_float64(a, s); 204 } 205 206 static float64 vcdlg64(float64 a, float_status *s) 207 { 208 return uint64_to_float64(a, s); 209 } 210 211 static float64 vcgd64(float64 a, float_status *s) 212 { 213 const float64 tmp = float64_to_int64(a, s); 214 215 return float64_is_any_nan(a) ? INT64_MIN : tmp; 216 } 217 218 static float64 vclgd64(float64 a, float_status *s) 219 { 220 const float64 tmp = float64_to_uint64(a, s); 221 222 return float64_is_any_nan(a) ? 0 : tmp; 223 } 224 225 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \ 226 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \ 227 uint32_t desc) \ 228 { \ 229 const uint8_t erm = extract32(simd_data(desc), 4, 4); \ 230 const bool se = extract32(simd_data(desc), 3, 1); \ 231 const bool XxC = extract32(simd_data(desc), 2, 1); \ 232 \ 233 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \ 234 } 235 236 #define DEF_GVEC_VOP2_32(NAME) \ 237 DEF_GVEC_VOP2_FN(NAME, NAME##32, 32) 238 239 #define DEF_GVEC_VOP2_64(NAME) \ 240 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64) 241 242 #define DEF_GVEC_VOP2(NAME, OP) \ 243 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \ 244 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \ 245 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128) 246 247 DEF_GVEC_VOP2_32(vcdg) 248 DEF_GVEC_VOP2_32(vcdlg) 249 DEF_GVEC_VOP2_32(vcgd) 250 DEF_GVEC_VOP2_32(vclgd) 251 DEF_GVEC_VOP2_64(vcdg) 252 DEF_GVEC_VOP2_64(vcdlg) 253 DEF_GVEC_VOP2_64(vcgd) 254 DEF_GVEC_VOP2_64(vclgd) 255 DEF_GVEC_VOP2(vfi, round_to_int) 256 DEF_GVEC_VOP2(vfsq, sqrt) 257 258 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s); 259 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 260 CPUS390XState *env, bool s, vop32_3_fn fn, 261 uintptr_t retaddr) 262 { 263 uint8_t vxc, vec_exc = 0; 264 S390Vector tmp = {}; 265 int i; 266 267 for (i = 0; i < 4; i++) { 268 const float32 a = s390_vec_read_float32(v2, i); 269 const float32 b = s390_vec_read_float32(v3, i); 270 271 s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status)); 272 vxc = check_ieee_exc(env, i, false, &vec_exc); 273 if (s || vxc) { 274 break; 275 } 276 } 277 handle_ieee_exc(env, vxc, vec_exc, retaddr); 278 *v1 = tmp; 279 } 280 281 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s); 282 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 283 CPUS390XState *env, bool s, vop64_3_fn fn, 284 uintptr_t retaddr) 285 { 286 uint8_t vxc, vec_exc = 0; 287 S390Vector tmp = {}; 288 int i; 289 290 for (i = 0; i < 2; i++) { 291 const float64 a = s390_vec_read_float64(v2, i); 292 const float64 b = s390_vec_read_float64(v3, i); 293 294 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status)); 295 vxc = check_ieee_exc(env, i, false, &vec_exc); 296 if (s || vxc) { 297 break; 298 } 299 } 300 handle_ieee_exc(env, vxc, vec_exc, retaddr); 301 *v1 = tmp; 302 } 303 304 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s); 305 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 306 CPUS390XState *env, bool s, vop128_3_fn fn, 307 uintptr_t retaddr) 308 { 309 const float128 a = s390_vec_read_float128(v2); 310 const float128 b = s390_vec_read_float128(v3); 311 uint8_t vxc, vec_exc = 0; 312 S390Vector tmp = {}; 313 314 s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status)); 315 vxc = check_ieee_exc(env, 0, false, &vec_exc); 316 handle_ieee_exc(env, vxc, vec_exc, retaddr); 317 *v1 = tmp; 318 } 319 320 #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \ 321 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 322 CPUS390XState *env, uint32_t desc) \ 323 { \ 324 const bool se = extract32(simd_data(desc), 3, 1); \ 325 \ 326 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \ 327 } 328 329 #define DEF_GVEC_VOP3(NAME, OP) \ 330 DEF_GVEC_VOP3_B(NAME, OP, 32) \ 331 DEF_GVEC_VOP3_B(NAME, OP, 64) \ 332 DEF_GVEC_VOP3_B(NAME, OP, 128) 333 334 DEF_GVEC_VOP3(vfa, add) 335 DEF_GVEC_VOP3(vfs, sub) 336 DEF_GVEC_VOP3(vfd, div) 337 DEF_GVEC_VOP3(vfm, mul) 338 339 static int wfc32(const S390Vector *v1, const S390Vector *v2, 340 CPUS390XState *env, bool signal, uintptr_t retaddr) 341 { 342 /* only the zero-indexed elements are compared */ 343 const float32 a = s390_vec_read_float32(v1, 0); 344 const float32 b = s390_vec_read_float32(v2, 0); 345 uint8_t vxc, vec_exc = 0; 346 int cmp; 347 348 if (signal) { 349 cmp = float32_compare(a, b, &env->fpu_status); 350 } else { 351 cmp = float32_compare_quiet(a, b, &env->fpu_status); 352 } 353 vxc = check_ieee_exc(env, 0, false, &vec_exc); 354 handle_ieee_exc(env, vxc, vec_exc, retaddr); 355 356 return float_comp_to_cc(env, cmp); 357 } 358 359 static int wfc64(const S390Vector *v1, const S390Vector *v2, 360 CPUS390XState *env, bool signal, uintptr_t retaddr) 361 { 362 /* only the zero-indexed elements are compared */ 363 const float64 a = s390_vec_read_float64(v1, 0); 364 const float64 b = s390_vec_read_float64(v2, 0); 365 uint8_t vxc, vec_exc = 0; 366 int cmp; 367 368 if (signal) { 369 cmp = float64_compare(a, b, &env->fpu_status); 370 } else { 371 cmp = float64_compare_quiet(a, b, &env->fpu_status); 372 } 373 vxc = check_ieee_exc(env, 0, false, &vec_exc); 374 handle_ieee_exc(env, vxc, vec_exc, retaddr); 375 376 return float_comp_to_cc(env, cmp); 377 } 378 379 static int wfc128(const S390Vector *v1, const S390Vector *v2, 380 CPUS390XState *env, bool signal, uintptr_t retaddr) 381 { 382 /* only the zero-indexed elements are compared */ 383 const float128 a = s390_vec_read_float128(v1); 384 const float128 b = s390_vec_read_float128(v2); 385 uint8_t vxc, vec_exc = 0; 386 int cmp; 387 388 if (signal) { 389 cmp = float128_compare(a, b, &env->fpu_status); 390 } else { 391 cmp = float128_compare_quiet(a, b, &env->fpu_status); 392 } 393 vxc = check_ieee_exc(env, 0, false, &vec_exc); 394 handle_ieee_exc(env, vxc, vec_exc, retaddr); 395 396 return float_comp_to_cc(env, cmp); 397 } 398 399 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \ 400 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \ 401 CPUS390XState *env, uint32_t desc) \ 402 { \ 403 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \ 404 } 405 406 #define DEF_GVEC_WFC(NAME, SIGNAL) \ 407 DEF_GVEC_WFC_B(NAME, SIGNAL, 32) \ 408 DEF_GVEC_WFC_B(NAME, SIGNAL, 64) \ 409 DEF_GVEC_WFC_B(NAME, SIGNAL, 128) 410 411 DEF_GVEC_WFC(wfc, false) 412 DEF_GVEC_WFC(wfk, true) 413 414 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status); 415 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 416 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr) 417 { 418 uint8_t vxc, vec_exc = 0; 419 S390Vector tmp = {}; 420 int match = 0; 421 int i; 422 423 for (i = 0; i < 4; i++) { 424 const float32 a = s390_vec_read_float32(v2, i); 425 const float32 b = s390_vec_read_float32(v3, i); 426 427 /* swap the order of the parameters, so we can use existing functions */ 428 if (fn(b, a, &env->fpu_status)) { 429 match++; 430 s390_vec_write_element32(&tmp, i, -1u); 431 } 432 vxc = check_ieee_exc(env, i, false, &vec_exc); 433 if (s || vxc) { 434 break; 435 } 436 } 437 438 handle_ieee_exc(env, vxc, vec_exc, retaddr); 439 *v1 = tmp; 440 if (match) { 441 return s || match == 4 ? 0 : 1; 442 } 443 return 3; 444 } 445 446 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); 447 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 448 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) 449 { 450 uint8_t vxc, vec_exc = 0; 451 S390Vector tmp = {}; 452 int match = 0; 453 int i; 454 455 for (i = 0; i < 2; i++) { 456 const float64 a = s390_vec_read_float64(v2, i); 457 const float64 b = s390_vec_read_float64(v3, i); 458 459 /* swap the order of the parameters, so we can use existing functions */ 460 if (fn(b, a, &env->fpu_status)) { 461 match++; 462 s390_vec_write_element64(&tmp, i, -1ull); 463 } 464 vxc = check_ieee_exc(env, i, false, &vec_exc); 465 if (s || vxc) { 466 break; 467 } 468 } 469 470 handle_ieee_exc(env, vxc, vec_exc, retaddr); 471 *v1 = tmp; 472 if (match) { 473 return s || match == 2 ? 0 : 1; 474 } 475 return 3; 476 } 477 478 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status); 479 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 480 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr) 481 { 482 const float128 a = s390_vec_read_float128(v2); 483 const float128 b = s390_vec_read_float128(v3); 484 uint8_t vxc, vec_exc = 0; 485 S390Vector tmp = {}; 486 bool match = false; 487 488 /* swap the order of the parameters, so we can use existing functions */ 489 if (fn(b, a, &env->fpu_status)) { 490 match = true; 491 s390_vec_write_element64(&tmp, 0, -1ull); 492 s390_vec_write_element64(&tmp, 1, -1ull); 493 } 494 vxc = check_ieee_exc(env, 0, false, &vec_exc); 495 handle_ieee_exc(env, vxc, vec_exc, retaddr); 496 *v1 = tmp; 497 return match ? 0 : 3; 498 } 499 500 #define DEF_GVEC_VFC_B(NAME, OP, BITS) \ 501 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 502 CPUS390XState *env, uint32_t desc) \ 503 { \ 504 const bool se = extract32(simd_data(desc), 3, 1); \ 505 const bool sq = extract32(simd_data(desc), 2, 1); \ 506 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ 507 \ 508 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ 509 } \ 510 \ 511 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \ 512 CPUS390XState *env, uint32_t desc) \ 513 { \ 514 const bool se = extract32(simd_data(desc), 3, 1); \ 515 const bool sq = extract32(simd_data(desc), 2, 1); \ 516 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ 517 \ 518 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ 519 } 520 521 #define DEF_GVEC_VFC(NAME, OP) \ 522 DEF_GVEC_VFC_B(NAME, OP, 32) \ 523 DEF_GVEC_VFC_B(NAME, OP, 64) \ 524 DEF_GVEC_VFC_B(NAME, OP, 128) \ 525 526 DEF_GVEC_VFC(vfce, eq) 527 DEF_GVEC_VFC(vfch, lt) 528 DEF_GVEC_VFC(vfche, le) 529 530 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, 531 uint32_t desc) 532 { 533 const bool s = extract32(simd_data(desc), 3, 1); 534 uint8_t vxc, vec_exc = 0; 535 S390Vector tmp = {}; 536 int i; 537 538 for (i = 0; i < 2; i++) { 539 /* load from even element */ 540 const float32 a = s390_vec_read_element32(v2, i * 2); 541 const uint64_t ret = float32_to_float64(a, &env->fpu_status); 542 543 s390_vec_write_element64(&tmp, i, ret); 544 /* indicate the source element */ 545 vxc = check_ieee_exc(env, i * 2, false, &vec_exc); 546 if (s || vxc) { 547 break; 548 } 549 } 550 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 551 *(S390Vector *)v1 = tmp; 552 } 553 554 void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env, 555 uint32_t desc) 556 { 557 /* load from even element */ 558 const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0), 559 &env->fpu_status); 560 uint8_t vxc, vec_exc = 0; 561 562 vxc = check_ieee_exc(env, 0, false, &vec_exc); 563 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 564 s390_vec_write_float128(v1, ret); 565 } 566 567 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, 568 uint32_t desc) 569 { 570 const uint8_t erm = extract32(simd_data(desc), 4, 4); 571 const bool s = extract32(simd_data(desc), 3, 1); 572 const bool XxC = extract32(simd_data(desc), 2, 1); 573 uint8_t vxc, vec_exc = 0; 574 S390Vector tmp = {}; 575 int i, old_mode; 576 577 old_mode = s390_swap_bfp_rounding_mode(env, erm); 578 for (i = 0; i < 2; i++) { 579 float64 a = s390_vec_read_element64(v2, i); 580 uint32_t ret = float64_to_float32(a, &env->fpu_status); 581 582 /* place at even element */ 583 s390_vec_write_element32(&tmp, i * 2, ret); 584 /* indicate the source element */ 585 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 586 if (s || vxc) { 587 break; 588 } 589 } 590 s390_restore_bfp_rounding_mode(env, old_mode); 591 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 592 *(S390Vector *)v1 = tmp; 593 } 594 595 void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env, 596 uint32_t desc) 597 { 598 const uint8_t erm = extract32(simd_data(desc), 4, 4); 599 const bool XxC = extract32(simd_data(desc), 2, 1); 600 uint8_t vxc, vec_exc = 0; 601 int old_mode; 602 float64 ret; 603 604 old_mode = s390_swap_bfp_rounding_mode(env, erm); 605 ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status); 606 vxc = check_ieee_exc(env, 0, XxC, &vec_exc); 607 s390_restore_bfp_rounding_mode(env, old_mode); 608 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 609 610 /* place at even element, odd element is unpredictable */ 611 s390_vec_write_float64(v1, 0, ret); 612 } 613 614 static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 615 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 616 uintptr_t retaddr) 617 { 618 uint8_t vxc, vec_exc = 0; 619 S390Vector tmp = {}; 620 int i; 621 622 for (i = 0; i < 4; i++) { 623 const float32 a = s390_vec_read_float32(v3, i); 624 const float32 b = s390_vec_read_float32(v2, i); 625 const float32 c = s390_vec_read_float32(v4, i); 626 float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status); 627 628 s390_vec_write_float32(&tmp, i, ret); 629 vxc = check_ieee_exc(env, i, false, &vec_exc); 630 if (s || vxc) { 631 break; 632 } 633 } 634 handle_ieee_exc(env, vxc, vec_exc, retaddr); 635 *v1 = tmp; 636 } 637 638 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 639 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 640 uintptr_t retaddr) 641 { 642 uint8_t vxc, vec_exc = 0; 643 S390Vector tmp = {}; 644 int i; 645 646 for (i = 0; i < 2; i++) { 647 const float64 a = s390_vec_read_float64(v3, i); 648 const float64 b = s390_vec_read_float64(v2, i); 649 const float64 c = s390_vec_read_float64(v4, i); 650 const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status); 651 652 s390_vec_write_float64(&tmp, i, ret); 653 vxc = check_ieee_exc(env, i, false, &vec_exc); 654 if (s || vxc) { 655 break; 656 } 657 } 658 handle_ieee_exc(env, vxc, vec_exc, retaddr); 659 *v1 = tmp; 660 } 661 662 static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 663 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 664 uintptr_t retaddr) 665 { 666 const float128 a = s390_vec_read_float128(v3); 667 const float128 b = s390_vec_read_float128(v2); 668 const float128 c = s390_vec_read_float128(v4); 669 uint8_t vxc, vec_exc = 0; 670 float128 ret; 671 672 ret = float128_muladd(a, b, c, flags, &env->fpu_status); 673 vxc = check_ieee_exc(env, 0, false, &vec_exc); 674 handle_ieee_exc(env, vxc, vec_exc, retaddr); 675 s390_vec_write_float128(v1, ret); 676 } 677 678 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \ 679 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 680 const void *v4, CPUS390XState *env, \ 681 uint32_t desc) \ 682 { \ 683 const bool se = extract32(simd_data(desc), 3, 1); \ 684 \ 685 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \ 686 } 687 688 #define DEF_GVEC_VFMA(NAME, FLAGS) \ 689 DEF_GVEC_VFMA_B(NAME, FLAGS, 32) \ 690 DEF_GVEC_VFMA_B(NAME, FLAGS, 64) \ 691 DEF_GVEC_VFMA_B(NAME, FLAGS, 128) 692 693 DEF_GVEC_VFMA(vfma, 0) 694 DEF_GVEC_VFMA(vfms, float_muladd_negate_c) 695 DEF_GVEC_VFMA(vfnma, float_muladd_negate_result) 696 DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result) 697 698 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env, 699 uint32_t desc) 700 { 701 uint16_t i3 = extract32(simd_data(desc), 4, 12); 702 bool s = extract32(simd_data(desc), 3, 1); 703 int i, match = 0; 704 705 for (i = 0; i < 4; i++) { 706 float32 a = s390_vec_read_float32(v2, i); 707 708 if (float32_dcmask(env, a) & i3) { 709 match++; 710 s390_vec_write_element32(v1, i, -1u); 711 } else { 712 s390_vec_write_element32(v1, i, 0); 713 } 714 if (s) { 715 break; 716 } 717 } 718 719 if (match == 4 || (s && match)) { 720 env->cc_op = 0; 721 } else if (match) { 722 env->cc_op = 1; 723 } else { 724 env->cc_op = 3; 725 } 726 } 727 728 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, 729 uint32_t desc) 730 { 731 const uint16_t i3 = extract32(simd_data(desc), 4, 12); 732 const bool s = extract32(simd_data(desc), 3, 1); 733 int i, match = 0; 734 735 for (i = 0; i < 2; i++) { 736 const float64 a = s390_vec_read_float64(v2, i); 737 738 if (float64_dcmask(env, a) & i3) { 739 match++; 740 s390_vec_write_element64(v1, i, -1ull); 741 } else { 742 s390_vec_write_element64(v1, i, 0); 743 } 744 if (s) { 745 break; 746 } 747 } 748 749 if (match == 2 || (s && match)) { 750 env->cc_op = 0; 751 } else if (match) { 752 env->cc_op = 1; 753 } else { 754 env->cc_op = 3; 755 } 756 } 757 758 void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env, 759 uint32_t desc) 760 { 761 const float128 a = s390_vec_read_float128(v2); 762 uint16_t i3 = extract32(simd_data(desc), 4, 12); 763 764 if (float128_dcmask(env, a) & i3) { 765 env->cc_op = 0; 766 s390_vec_write_element64(v1, 0, -1ull); 767 s390_vec_write_element64(v1, 1, -1ull); 768 } else { 769 env->cc_op = 3; 770 s390_vec_write_element64(v1, 0, 0); 771 s390_vec_write_element64(v1, 1, 0); 772 } 773 } 774 775 typedef enum S390MinMaxType { 776 S390_MINMAX_TYPE_IEEE = 0, 777 S390_MINMAX_TYPE_JAVA, 778 S390_MINMAX_TYPE_C_MACRO, 779 S390_MINMAX_TYPE_CPP, 780 S390_MINMAX_TYPE_F, 781 } S390MinMaxType; 782 783 typedef enum S390MinMaxRes { 784 S390_MINMAX_RES_MINMAX = 0, 785 S390_MINMAX_RES_A, 786 S390_MINMAX_RES_B, 787 S390_MINMAX_RES_SILENCE_A, 788 S390_MINMAX_RES_SILENCE_B, 789 } S390MinMaxRes; 790 791 static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b, 792 S390MinMaxType type, float_status *s) 793 { 794 const bool neg_a = dcmask_a & DCMASK_NEGATIVE; 795 const bool nan_a = dcmask_a & DCMASK_NAN; 796 const bool nan_b = dcmask_b & DCMASK_NAN; 797 798 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); 799 800 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { 801 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; 802 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; 803 804 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { 805 s->float_exception_flags |= float_flag_invalid; 806 } 807 switch (type) { 808 case S390_MINMAX_TYPE_JAVA: 809 if (sig_a) { 810 return S390_MINMAX_RES_SILENCE_A; 811 } else if (sig_b) { 812 return S390_MINMAX_RES_SILENCE_B; 813 } 814 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 815 case S390_MINMAX_TYPE_F: 816 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 817 case S390_MINMAX_TYPE_C_MACRO: 818 s->float_exception_flags |= float_flag_invalid; 819 return S390_MINMAX_RES_B; 820 case S390_MINMAX_TYPE_CPP: 821 s->float_exception_flags |= float_flag_invalid; 822 return S390_MINMAX_RES_A; 823 default: 824 g_assert_not_reached(); 825 } 826 } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) { 827 switch (type) { 828 case S390_MINMAX_TYPE_JAVA: 829 return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 830 case S390_MINMAX_TYPE_C_MACRO: 831 return S390_MINMAX_RES_B; 832 case S390_MINMAX_TYPE_F: 833 return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; 834 case S390_MINMAX_TYPE_CPP: 835 return S390_MINMAX_RES_A; 836 default: 837 g_assert_not_reached(); 838 } 839 } 840 return S390_MINMAX_RES_MINMAX; 841 } 842 843 static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b, 844 S390MinMaxType type, float_status *s) 845 { 846 g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); 847 848 if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { 849 const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; 850 const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; 851 const bool nan_a = dcmask_a & DCMASK_NAN; 852 const bool nan_b = dcmask_b & DCMASK_NAN; 853 854 if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { 855 s->float_exception_flags |= float_flag_invalid; 856 } 857 switch (type) { 858 case S390_MINMAX_TYPE_JAVA: 859 if (sig_a) { 860 return S390_MINMAX_RES_SILENCE_A; 861 } else if (sig_b) { 862 return S390_MINMAX_RES_SILENCE_B; 863 } 864 return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 865 case S390_MINMAX_TYPE_F: 866 return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; 867 case S390_MINMAX_TYPE_C_MACRO: 868 s->float_exception_flags |= float_flag_invalid; 869 return S390_MINMAX_RES_B; 870 case S390_MINMAX_TYPE_CPP: 871 s->float_exception_flags |= float_flag_invalid; 872 return S390_MINMAX_RES_A; 873 default: 874 g_assert_not_reached(); 875 } 876 } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) { 877 const bool neg_a = dcmask_a & DCMASK_NEGATIVE; 878 879 switch (type) { 880 case S390_MINMAX_TYPE_JAVA: 881 case S390_MINMAX_TYPE_F: 882 return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; 883 case S390_MINMAX_TYPE_C_MACRO: 884 return S390_MINMAX_RES_B; 885 case S390_MINMAX_TYPE_CPP: 886 return S390_MINMAX_RES_A; 887 default: 888 g_assert_not_reached(); 889 } 890 } 891 return S390_MINMAX_RES_MINMAX; 892 } 893 894 static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b, 895 S390MinMaxType type, bool is_min, 896 float_status *s) 897 { 898 return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) : 899 vfmax_res(dcmask_a, dcmask_b, type, s); 900 } 901 902 static void vfminmax32(S390Vector *v1, const S390Vector *v2, 903 const S390Vector *v3, CPUS390XState *env, 904 S390MinMaxType type, bool is_min, bool is_abs, bool se, 905 uintptr_t retaddr) 906 { 907 float_status *s = &env->fpu_status; 908 uint8_t vxc, vec_exc = 0; 909 S390Vector tmp = {}; 910 int i; 911 912 for (i = 0; i < 4; i++) { 913 float32 a = s390_vec_read_float32(v2, i); 914 float32 b = s390_vec_read_float32(v3, i); 915 float32 result; 916 917 if (type != S390_MINMAX_TYPE_IEEE) { 918 S390MinMaxRes res; 919 920 if (is_abs) { 921 a = float32_abs(a); 922 b = float32_abs(b); 923 } 924 925 res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b), 926 type, is_min, s); 927 switch (res) { 928 case S390_MINMAX_RES_MINMAX: 929 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s); 930 break; 931 case S390_MINMAX_RES_A: 932 result = a; 933 break; 934 case S390_MINMAX_RES_B: 935 result = b; 936 break; 937 case S390_MINMAX_RES_SILENCE_A: 938 result = float32_silence_nan(a, s); 939 break; 940 case S390_MINMAX_RES_SILENCE_B: 941 result = float32_silence_nan(b, s); 942 break; 943 default: 944 g_assert_not_reached(); 945 } 946 } else if (!is_abs) { 947 result = is_min ? float32_minnum(a, b, &env->fpu_status) : 948 float32_maxnum(a, b, &env->fpu_status); 949 } else { 950 result = is_min ? float32_minnummag(a, b, &env->fpu_status) : 951 float32_maxnummag(a, b, &env->fpu_status); 952 } 953 954 s390_vec_write_float32(&tmp, i, result); 955 vxc = check_ieee_exc(env, i, false, &vec_exc); 956 if (se || vxc) { 957 break; 958 } 959 } 960 handle_ieee_exc(env, vxc, vec_exc, retaddr); 961 *v1 = tmp; 962 } 963 964 static void vfminmax64(S390Vector *v1, const S390Vector *v2, 965 const S390Vector *v3, CPUS390XState *env, 966 S390MinMaxType type, bool is_min, bool is_abs, bool se, 967 uintptr_t retaddr) 968 { 969 float_status *s = &env->fpu_status; 970 uint8_t vxc, vec_exc = 0; 971 S390Vector tmp = {}; 972 int i; 973 974 for (i = 0; i < 2; i++) { 975 float64 a = s390_vec_read_float64(v2, i); 976 float64 b = s390_vec_read_float64(v3, i); 977 float64 result; 978 979 if (type != S390_MINMAX_TYPE_IEEE) { 980 S390MinMaxRes res; 981 982 if (is_abs) { 983 a = float64_abs(a); 984 b = float64_abs(b); 985 } 986 987 res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b), 988 type, is_min, s); 989 switch (res) { 990 case S390_MINMAX_RES_MINMAX: 991 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s); 992 break; 993 case S390_MINMAX_RES_A: 994 result = a; 995 break; 996 case S390_MINMAX_RES_B: 997 result = b; 998 break; 999 case S390_MINMAX_RES_SILENCE_A: 1000 result = float64_silence_nan(a, s); 1001 break; 1002 case S390_MINMAX_RES_SILENCE_B: 1003 result = float64_silence_nan(b, s); 1004 break; 1005 default: 1006 g_assert_not_reached(); 1007 } 1008 } else if (!is_abs) { 1009 result = is_min ? float64_minnum(a, b, &env->fpu_status) : 1010 float64_maxnum(a, b, &env->fpu_status); 1011 } else { 1012 result = is_min ? float64_minnummag(a, b, &env->fpu_status) : 1013 float64_maxnummag(a, b, &env->fpu_status); 1014 } 1015 1016 s390_vec_write_float64(&tmp, i, result); 1017 vxc = check_ieee_exc(env, i, false, &vec_exc); 1018 if (se || vxc) { 1019 break; 1020 } 1021 } 1022 handle_ieee_exc(env, vxc, vec_exc, retaddr); 1023 *v1 = tmp; 1024 } 1025 1026 static void vfminmax128(S390Vector *v1, const S390Vector *v2, 1027 const S390Vector *v3, CPUS390XState *env, 1028 S390MinMaxType type, bool is_min, bool is_abs, bool se, 1029 uintptr_t retaddr) 1030 { 1031 float128 a = s390_vec_read_float128(v2); 1032 float128 b = s390_vec_read_float128(v3); 1033 float_status *s = &env->fpu_status; 1034 uint8_t vxc, vec_exc = 0; 1035 float128 result; 1036 1037 if (type != S390_MINMAX_TYPE_IEEE) { 1038 S390MinMaxRes res; 1039 1040 if (is_abs) { 1041 a = float128_abs(a); 1042 b = float128_abs(b); 1043 } 1044 1045 res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b), 1046 type, is_min, s); 1047 switch (res) { 1048 case S390_MINMAX_RES_MINMAX: 1049 result = is_min ? float128_min(a, b, s) : float128_max(a, b, s); 1050 break; 1051 case S390_MINMAX_RES_A: 1052 result = a; 1053 break; 1054 case S390_MINMAX_RES_B: 1055 result = b; 1056 break; 1057 case S390_MINMAX_RES_SILENCE_A: 1058 result = float128_silence_nan(a, s); 1059 break; 1060 case S390_MINMAX_RES_SILENCE_B: 1061 result = float128_silence_nan(b, s); 1062 break; 1063 default: 1064 g_assert_not_reached(); 1065 } 1066 } else if (!is_abs) { 1067 result = is_min ? float128_minnum(a, b, &env->fpu_status) : 1068 float128_maxnum(a, b, &env->fpu_status); 1069 } else { 1070 result = is_min ? float128_minnummag(a, b, &env->fpu_status) : 1071 float128_maxnummag(a, b, &env->fpu_status); 1072 } 1073 1074 vxc = check_ieee_exc(env, 0, false, &vec_exc); 1075 handle_ieee_exc(env, vxc, vec_exc, retaddr); 1076 s390_vec_write_float128(v1, result); 1077 } 1078 1079 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS) \ 1080 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 1081 CPUS390XState *env, uint32_t desc) \ 1082 { \ 1083 const bool se = extract32(simd_data(desc), 3, 1); \ 1084 uint8_t type = extract32(simd_data(desc), 4, 4); \ 1085 bool is_abs = false; \ 1086 \ 1087 if (type >= 8) { \ 1088 is_abs = true; \ 1089 type -= 8; \ 1090 } \ 1091 \ 1092 vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC()); \ 1093 } 1094 1095 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN) \ 1096 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32) \ 1097 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64) \ 1098 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128) 1099 1100 DEF_GVEC_VFMINMAX(vfmax, false) 1101 DEF_GVEC_VFMINMAX(vfmin, true) 1102