1 /* 2 * QEMU TCG support -- s390x vector floating point instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "qemu-common.h" 14 #include "cpu.h" 15 #include "internal.h" 16 #include "vec.h" 17 #include "tcg_s390x.h" 18 #include "tcg/tcg-gvec-desc.h" 19 #include "exec/exec-all.h" 20 #include "exec/helper-proto.h" 21 #include "fpu/softfloat.h" 22 23 #define VIC_INVALID 0x1 24 #define VIC_DIVBYZERO 0x2 25 #define VIC_OVERFLOW 0x3 26 #define VIC_UNDERFLOW 0x4 27 #define VIC_INEXACT 0x5 28 29 /* returns the VEX. If the VEX is 0, there is no trap */ 30 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, 31 uint8_t *vec_exc) 32 { 33 uint8_t vece_exc = 0, trap_exc; 34 unsigned qemu_exc; 35 36 /* Retrieve and clear the softfloat exceptions */ 37 qemu_exc = env->fpu_status.float_exception_flags; 38 if (qemu_exc == 0) { 39 return 0; 40 } 41 env->fpu_status.float_exception_flags = 0; 42 43 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); 44 45 /* Add them to the vector-wide s390x exception bits */ 46 *vec_exc |= vece_exc; 47 48 /* Check for traps and construct the VXC */ 49 trap_exc = vece_exc & env->fpc >> 24; 50 if (trap_exc) { 51 if (trap_exc & S390_IEEE_MASK_INVALID) { 52 return enr << 4 | VIC_INVALID; 53 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { 54 return enr << 4 | VIC_DIVBYZERO; 55 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { 56 return enr << 4 | VIC_OVERFLOW; 57 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { 58 return enr << 4 | VIC_UNDERFLOW; 59 } else if (!XxC) { 60 g_assert(trap_exc & S390_IEEE_MASK_INEXACT); 61 /* inexact has lowest priority on traps */ 62 return enr << 4 | VIC_INEXACT; 63 } 64 } 65 return 0; 66 } 67 68 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, 69 uintptr_t retaddr) 70 { 71 if (vxc) { 72 /* on traps, the fpc flags are not updated, instruction is suppressed */ 73 tcg_s390_vector_exception(env, vxc, retaddr); 74 } 75 if (vec_exc) { 76 /* indicate exceptions for all elements combined */ 77 env->fpc |= vec_exc << 16; 78 } 79 } 80 81 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr) 82 { 83 return make_float64(s390_vec_read_element64(v, enr)); 84 } 85 86 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data) 87 { 88 return s390_vec_write_element64(v, enr, data); 89 } 90 91 typedef uint64_t (*vop64_2_fn)(uint64_t a, float_status *s); 92 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 93 bool s, bool XxC, uint8_t erm, vop64_2_fn fn, 94 uintptr_t retaddr) 95 { 96 uint8_t vxc, vec_exc = 0; 97 S390Vector tmp = {}; 98 int i, old_mode; 99 100 old_mode = s390_swap_bfp_rounding_mode(env, erm); 101 for (i = 0; i < 2; i++) { 102 const uint64_t a = s390_vec_read_element64(v2, i); 103 104 s390_vec_write_element64(&tmp, i, fn(a, &env->fpu_status)); 105 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 106 if (s || vxc) { 107 break; 108 } 109 } 110 s390_restore_bfp_rounding_mode(env, old_mode); 111 handle_ieee_exc(env, vxc, vec_exc, retaddr); 112 *v1 = tmp; 113 } 114 115 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s); 116 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 117 CPUS390XState *env, bool s, vop64_3_fn fn, 118 uintptr_t retaddr) 119 { 120 uint8_t vxc, vec_exc = 0; 121 S390Vector tmp = {}; 122 int i; 123 124 for (i = 0; i < 2; i++) { 125 const float64 a = s390_vec_read_float64(v2, i); 126 const float64 b = s390_vec_read_float64(v3, i); 127 128 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status)); 129 vxc = check_ieee_exc(env, i, false, &vec_exc); 130 if (s || vxc) { 131 break; 132 } 133 } 134 handle_ieee_exc(env, vxc, vec_exc, retaddr); 135 *v1 = tmp; 136 } 137 138 #define DEF_GVEC_VOP3(NAME, OP) \ 139 void HELPER(gvec_##NAME##64)(void *v1, const void *v2, const void *v3, \ 140 CPUS390XState *env, uint32_t desc) \ 141 { \ 142 const bool se = extract32(simd_data(desc), 3, 1); \ 143 \ 144 vop64_3(v1, v2, v3, env, se, float64_##OP, GETPC()); \ 145 } 146 147 DEF_GVEC_VOP3(vfa, add) 148 DEF_GVEC_VOP3(vfs, sub) 149 DEF_GVEC_VOP3(vfd, div) 150 DEF_GVEC_VOP3(vfm, mul) 151 152 static int wfc64(const S390Vector *v1, const S390Vector *v2, 153 CPUS390XState *env, bool signal, uintptr_t retaddr) 154 { 155 /* only the zero-indexed elements are compared */ 156 const float64 a = s390_vec_read_element64(v1, 0); 157 const float64 b = s390_vec_read_element64(v2, 0); 158 uint8_t vxc, vec_exc = 0; 159 int cmp; 160 161 if (signal) { 162 cmp = float64_compare(a, b, &env->fpu_status); 163 } else { 164 cmp = float64_compare_quiet(a, b, &env->fpu_status); 165 } 166 vxc = check_ieee_exc(env, 0, false, &vec_exc); 167 handle_ieee_exc(env, vxc, vec_exc, retaddr); 168 169 return float_comp_to_cc(env, cmp); 170 } 171 172 void HELPER(gvec_wfc64)(const void *v1, const void *v2, CPUS390XState *env, 173 uint32_t desc) 174 { 175 env->cc_op = wfc64(v1, v2, env, false, GETPC()); 176 } 177 178 void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env, 179 uint32_t desc) 180 { 181 env->cc_op = wfc64(v1, v2, env, true, GETPC()); 182 } 183 184 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); 185 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 186 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) 187 { 188 uint8_t vxc, vec_exc = 0; 189 S390Vector tmp = {}; 190 int match = 0; 191 int i; 192 193 for (i = 0; i < 2; i++) { 194 const float64 a = s390_vec_read_element64(v2, i); 195 const float64 b = s390_vec_read_element64(v3, i); 196 197 /* swap the order of the parameters, so we can use existing functions */ 198 if (fn(b, a, &env->fpu_status)) { 199 match++; 200 s390_vec_write_element64(&tmp, i, -1ull); 201 } 202 vxc = check_ieee_exc(env, i, false, &vec_exc); 203 if (s || vxc) { 204 break; 205 } 206 } 207 208 handle_ieee_exc(env, vxc, vec_exc, retaddr); 209 *v1 = tmp; 210 if (match) { 211 return s || match == 2 ? 0 : 1; 212 } 213 return 3; 214 } 215 216 void HELPER(gvec_vfce64)(void *v1, const void *v2, const void *v3, 217 CPUS390XState *env, uint32_t desc) 218 { 219 vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC()); 220 } 221 222 void HELPER(gvec_vfce64s)(void *v1, const void *v2, const void *v3, 223 CPUS390XState *env, uint32_t desc) 224 { 225 vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC()); 226 } 227 228 void HELPER(gvec_vfce64_cc)(void *v1, const void *v2, const void *v3, 229 CPUS390XState *env, uint32_t desc) 230 { 231 env->cc_op = vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC()); 232 } 233 234 void HELPER(gvec_vfce64s_cc)(void *v1, const void *v2, const void *v3, 235 CPUS390XState *env, uint32_t desc) 236 { 237 env->cc_op = vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC()); 238 } 239 240 void HELPER(gvec_vfch64)(void *v1, const void *v2, const void *v3, 241 CPUS390XState *env, uint32_t desc) 242 { 243 vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC()); 244 } 245 246 void HELPER(gvec_vfch64s)(void *v1, const void *v2, const void *v3, 247 CPUS390XState *env, uint32_t desc) 248 { 249 vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC()); 250 } 251 252 void HELPER(gvec_vfch64_cc)(void *v1, const void *v2, const void *v3, 253 CPUS390XState *env, uint32_t desc) 254 { 255 env->cc_op = vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC()); 256 } 257 258 void HELPER(gvec_vfch64s_cc)(void *v1, const void *v2, const void *v3, 259 CPUS390XState *env, uint32_t desc) 260 { 261 env->cc_op = vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC()); 262 } 263 264 void HELPER(gvec_vfche64)(void *v1, const void *v2, const void *v3, 265 CPUS390XState *env, uint32_t desc) 266 { 267 vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC()); 268 } 269 270 void HELPER(gvec_vfche64s)(void *v1, const void *v2, const void *v3, 271 CPUS390XState *env, uint32_t desc) 272 { 273 vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC()); 274 } 275 276 void HELPER(gvec_vfche64_cc)(void *v1, const void *v2, const void *v3, 277 CPUS390XState *env, uint32_t desc) 278 { 279 env->cc_op = vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC()); 280 } 281 282 void HELPER(gvec_vfche64s_cc)(void *v1, const void *v2, const void *v3, 283 CPUS390XState *env, uint32_t desc) 284 { 285 env->cc_op = vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC()); 286 } 287 288 static uint64_t vcdg64(uint64_t a, float_status *s) 289 { 290 return int64_to_float64(a, s); 291 } 292 293 void HELPER(gvec_vcdg64)(void *v1, const void *v2, CPUS390XState *env, 294 uint32_t desc) 295 { 296 const uint8_t erm = extract32(simd_data(desc), 4, 4); 297 const bool XxC = extract32(simd_data(desc), 2, 1); 298 299 vop64_2(v1, v2, env, false, XxC, erm, vcdg64, GETPC()); 300 } 301 302 void HELPER(gvec_vcdg64s)(void *v1, const void *v2, CPUS390XState *env, 303 uint32_t desc) 304 { 305 const uint8_t erm = extract32(simd_data(desc), 4, 4); 306 const bool XxC = extract32(simd_data(desc), 2, 1); 307 308 vop64_2(v1, v2, env, true, XxC, erm, vcdg64, GETPC()); 309 } 310 311 static uint64_t vcdlg64(uint64_t a, float_status *s) 312 { 313 return uint64_to_float64(a, s); 314 } 315 316 void HELPER(gvec_vcdlg64)(void *v1, const void *v2, CPUS390XState *env, 317 uint32_t desc) 318 { 319 const uint8_t erm = extract32(simd_data(desc), 4, 4); 320 const bool XxC = extract32(simd_data(desc), 2, 1); 321 322 vop64_2(v1, v2, env, false, XxC, erm, vcdlg64, GETPC()); 323 } 324 325 void HELPER(gvec_vcdlg64s)(void *v1, const void *v2, CPUS390XState *env, 326 uint32_t desc) 327 { 328 const uint8_t erm = extract32(simd_data(desc), 4, 4); 329 const bool XxC = extract32(simd_data(desc), 2, 1); 330 331 vop64_2(v1, v2, env, true, XxC, erm, vcdlg64, GETPC()); 332 } 333 334 static uint64_t vcgd64(uint64_t a, float_status *s) 335 { 336 const uint64_t tmp = float64_to_int64(a, s); 337 338 return float64_is_any_nan(a) ? INT64_MIN : tmp; 339 } 340 341 void HELPER(gvec_vcgd64)(void *v1, const void *v2, CPUS390XState *env, 342 uint32_t desc) 343 { 344 const uint8_t erm = extract32(simd_data(desc), 4, 4); 345 const bool XxC = extract32(simd_data(desc), 2, 1); 346 347 vop64_2(v1, v2, env, false, XxC, erm, vcgd64, GETPC()); 348 } 349 350 void HELPER(gvec_vcgd64s)(void *v1, const void *v2, CPUS390XState *env, 351 uint32_t desc) 352 { 353 const uint8_t erm = extract32(simd_data(desc), 4, 4); 354 const bool XxC = extract32(simd_data(desc), 2, 1); 355 356 vop64_2(v1, v2, env, true, XxC, erm, vcgd64, GETPC()); 357 } 358 359 static uint64_t vclgd64(uint64_t a, float_status *s) 360 { 361 const uint64_t tmp = float64_to_uint64(a, s); 362 363 return float64_is_any_nan(a) ? 0 : tmp; 364 } 365 366 void HELPER(gvec_vclgd64)(void *v1, const void *v2, CPUS390XState *env, 367 uint32_t desc) 368 { 369 const uint8_t erm = extract32(simd_data(desc), 4, 4); 370 const bool XxC = extract32(simd_data(desc), 2, 1); 371 372 vop64_2(v1, v2, env, false, XxC, erm, vclgd64, GETPC()); 373 } 374 375 void HELPER(gvec_vclgd64s)(void *v1, const void *v2, CPUS390XState *env, 376 uint32_t desc) 377 { 378 const uint8_t erm = extract32(simd_data(desc), 4, 4); 379 const bool XxC = extract32(simd_data(desc), 2, 1); 380 381 vop64_2(v1, v2, env, true, XxC, erm, vclgd64, GETPC()); 382 } 383 384 static uint64_t vfi64(uint64_t a, float_status *s) 385 { 386 return float64_round_to_int(a, s); 387 } 388 389 void HELPER(gvec_vfi64)(void *v1, const void *v2, CPUS390XState *env, 390 uint32_t desc) 391 { 392 const uint8_t erm = extract32(simd_data(desc), 4, 4); 393 const bool XxC = extract32(simd_data(desc), 2, 1); 394 395 vop64_2(v1, v2, env, false, XxC, erm, vfi64, GETPC()); 396 } 397 398 void HELPER(gvec_vfi64s)(void *v1, const void *v2, CPUS390XState *env, 399 uint32_t desc) 400 { 401 const uint8_t erm = extract32(simd_data(desc), 4, 4); 402 const bool XxC = extract32(simd_data(desc), 2, 1); 403 404 vop64_2(v1, v2, env, true, XxC, erm, vfi64, GETPC()); 405 } 406 407 static void vfll32(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 408 bool s, uintptr_t retaddr) 409 { 410 uint8_t vxc, vec_exc = 0; 411 S390Vector tmp = {}; 412 int i; 413 414 for (i = 0; i < 2; i++) { 415 /* load from even element */ 416 const float32 a = s390_vec_read_element32(v2, i * 2); 417 const uint64_t ret = float32_to_float64(a, &env->fpu_status); 418 419 s390_vec_write_element64(&tmp, i, ret); 420 /* indicate the source element */ 421 vxc = check_ieee_exc(env, i * 2, false, &vec_exc); 422 if (s || vxc) { 423 break; 424 } 425 } 426 handle_ieee_exc(env, vxc, vec_exc, retaddr); 427 *v1 = tmp; 428 } 429 430 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, 431 uint32_t desc) 432 { 433 vfll32(v1, v2, env, false, GETPC()); 434 } 435 436 void HELPER(gvec_vfll32s)(void *v1, const void *v2, CPUS390XState *env, 437 uint32_t desc) 438 { 439 vfll32(v1, v2, env, true, GETPC()); 440 } 441 442 static void vflr64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 443 bool s, bool XxC, uint8_t erm, uintptr_t retaddr) 444 { 445 uint8_t vxc, vec_exc = 0; 446 S390Vector tmp = {}; 447 int i, old_mode; 448 449 old_mode = s390_swap_bfp_rounding_mode(env, erm); 450 for (i = 0; i < 2; i++) { 451 float64 a = s390_vec_read_element64(v2, i); 452 uint32_t ret = float64_to_float32(a, &env->fpu_status); 453 454 /* place at even element */ 455 s390_vec_write_element32(&tmp, i * 2, ret); 456 /* indicate the source element */ 457 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 458 if (s || vxc) { 459 break; 460 } 461 } 462 s390_restore_bfp_rounding_mode(env, old_mode); 463 handle_ieee_exc(env, vxc, vec_exc, retaddr); 464 *v1 = tmp; 465 } 466 467 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, 468 uint32_t desc) 469 { 470 const uint8_t erm = extract32(simd_data(desc), 4, 4); 471 const bool XxC = extract32(simd_data(desc), 2, 1); 472 473 vflr64(v1, v2, env, false, XxC, erm, GETPC()); 474 } 475 476 void HELPER(gvec_vflr64s)(void *v1, const void *v2, CPUS390XState *env, 477 uint32_t desc) 478 { 479 const uint8_t erm = extract32(simd_data(desc), 4, 4); 480 const bool XxC = extract32(simd_data(desc), 2, 1); 481 482 vflr64(v1, v2, env, true, XxC, erm, GETPC()); 483 } 484 485 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 486 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 487 uintptr_t retaddr) 488 { 489 uint8_t vxc, vec_exc = 0; 490 S390Vector tmp = {}; 491 int i; 492 493 for (i = 0; i < 2; i++) { 494 const uint64_t a = s390_vec_read_element64(v2, i); 495 const uint64_t b = s390_vec_read_element64(v3, i); 496 const uint64_t c = s390_vec_read_element64(v4, i); 497 uint64_t ret = float64_muladd(a, b, c, flags, &env->fpu_status); 498 499 s390_vec_write_element64(&tmp, i, ret); 500 vxc = check_ieee_exc(env, i, false, &vec_exc); 501 if (s || vxc) { 502 break; 503 } 504 } 505 handle_ieee_exc(env, vxc, vec_exc, retaddr); 506 *v1 = tmp; 507 } 508 509 void HELPER(gvec_vfma64)(void *v1, const void *v2, const void *v3, 510 const void *v4, CPUS390XState *env, uint32_t desc) 511 { 512 vfma64(v1, v2, v3, v4, env, false, 0, GETPC()); 513 } 514 515 void HELPER(gvec_vfma64s)(void *v1, const void *v2, const void *v3, 516 const void *v4, CPUS390XState *env, uint32_t desc) 517 { 518 vfma64(v1, v2, v3, v4, env, true, 0, GETPC()); 519 } 520 521 void HELPER(gvec_vfms64)(void *v1, const void *v2, const void *v3, 522 const void *v4, CPUS390XState *env, uint32_t desc) 523 { 524 vfma64(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC()); 525 } 526 527 void HELPER(gvec_vfms64s)(void *v1, const void *v2, const void *v3, 528 const void *v4, CPUS390XState *env, uint32_t desc) 529 { 530 vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC()); 531 } 532 533 static uint64_t vfsq64(uint64_t a, float_status *s) 534 { 535 return float64_sqrt(a, s); 536 } 537 538 void HELPER(gvec_vfsq64)(void *v1, const void *v2, CPUS390XState *env, 539 uint32_t desc) 540 { 541 vop64_2(v1, v2, env, false, false, 0, vfsq64, GETPC()); 542 } 543 544 void HELPER(gvec_vfsq64s)(void *v1, const void *v2, CPUS390XState *env, 545 uint32_t desc) 546 { 547 vop64_2(v1, v2, env, true, false, 0, vfsq64, GETPC()); 548 } 549 550 static int vftci64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 551 bool s, uint16_t i3) 552 { 553 int i, match = 0; 554 555 for (i = 0; i < 2; i++) { 556 float64 a = s390_vec_read_element64(v2, i); 557 558 if (float64_dcmask(env, a) & i3) { 559 match++; 560 s390_vec_write_element64(v1, i, -1ull); 561 } else { 562 s390_vec_write_element64(v1, i, 0); 563 } 564 if (s) { 565 break; 566 } 567 } 568 569 if (match) { 570 return s || match == 2 ? 0 : 1; 571 } 572 return 3; 573 } 574 575 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, 576 uint32_t desc) 577 { 578 env->cc_op = vftci64(v1, v2, env, false, simd_data(desc)); 579 } 580 581 void HELPER(gvec_vftci64s)(void *v1, const void *v2, CPUS390XState *env, 582 uint32_t desc) 583 { 584 env->cc_op = vftci64(v1, v2, env, true, simd_data(desc)); 585 } 586