1 /* 2 * QEMU TCG support -- s390x vector floating point instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "qemu-common.h" 14 #include "cpu.h" 15 #include "internal.h" 16 #include "vec.h" 17 #include "tcg_s390x.h" 18 #include "tcg/tcg-gvec-desc.h" 19 #include "exec/exec-all.h" 20 #include "exec/helper-proto.h" 21 #include "fpu/softfloat.h" 22 23 #define VIC_INVALID 0x1 24 #define VIC_DIVBYZERO 0x2 25 #define VIC_OVERFLOW 0x3 26 #define VIC_UNDERFLOW 0x4 27 #define VIC_INEXACT 0x5 28 29 /* returns the VEX. If the VEX is 0, there is no trap */ 30 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, 31 uint8_t *vec_exc) 32 { 33 uint8_t vece_exc = 0, trap_exc; 34 unsigned qemu_exc; 35 36 /* Retrieve and clear the softfloat exceptions */ 37 qemu_exc = env->fpu_status.float_exception_flags; 38 if (qemu_exc == 0) { 39 return 0; 40 } 41 env->fpu_status.float_exception_flags = 0; 42 43 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); 44 45 /* Add them to the vector-wide s390x exception bits */ 46 *vec_exc |= vece_exc; 47 48 /* Check for traps and construct the VXC */ 49 trap_exc = vece_exc & env->fpc >> 24; 50 if (trap_exc) { 51 if (trap_exc & S390_IEEE_MASK_INVALID) { 52 return enr << 4 | VIC_INVALID; 53 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { 54 return enr << 4 | VIC_DIVBYZERO; 55 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { 56 return enr << 4 | VIC_OVERFLOW; 57 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { 58 return enr << 4 | VIC_UNDERFLOW; 59 } else if (!XxC) { 60 g_assert(trap_exc & S390_IEEE_MASK_INEXACT); 61 /* inexact has lowest priority on traps */ 62 return enr << 4 | VIC_INEXACT; 63 } 64 } 65 return 0; 66 } 67 68 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, 69 uintptr_t retaddr) 70 { 71 if (vxc) { 72 /* on traps, the fpc flags are not updated, instruction is suppressed */ 73 tcg_s390_vector_exception(env, vxc, retaddr); 74 } 75 if (vec_exc) { 76 /* indicate exceptions for all elements combined */ 77 env->fpc |= vec_exc << 16; 78 } 79 } 80 81 typedef uint64_t (*vop64_2_fn)(uint64_t a, float_status *s); 82 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 83 bool s, bool XxC, uint8_t erm, vop64_2_fn fn, 84 uintptr_t retaddr) 85 { 86 uint8_t vxc, vec_exc = 0; 87 S390Vector tmp = {}; 88 int i, old_mode; 89 90 old_mode = s390_swap_bfp_rounding_mode(env, erm); 91 for (i = 0; i < 2; i++) { 92 const uint64_t a = s390_vec_read_element64(v2, i); 93 94 s390_vec_write_element64(&tmp, i, fn(a, &env->fpu_status)); 95 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 96 if (s || vxc) { 97 break; 98 } 99 } 100 s390_restore_bfp_rounding_mode(env, old_mode); 101 handle_ieee_exc(env, vxc, vec_exc, retaddr); 102 *v1 = tmp; 103 } 104 105 typedef uint64_t (*vop64_3_fn)(uint64_t a, uint64_t b, float_status *s); 106 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 107 CPUS390XState *env, bool s, vop64_3_fn fn, 108 uintptr_t retaddr) 109 { 110 uint8_t vxc, vec_exc = 0; 111 S390Vector tmp = {}; 112 int i; 113 114 for (i = 0; i < 2; i++) { 115 const uint64_t a = s390_vec_read_element64(v2, i); 116 const uint64_t b = s390_vec_read_element64(v3, i); 117 118 s390_vec_write_element64(&tmp, i, fn(a, b, &env->fpu_status)); 119 vxc = check_ieee_exc(env, i, false, &vec_exc); 120 if (s || vxc) { 121 break; 122 } 123 } 124 handle_ieee_exc(env, vxc, vec_exc, retaddr); 125 *v1 = tmp; 126 } 127 128 static uint64_t vfa64(uint64_t a, uint64_t b, float_status *s) 129 { 130 return float64_add(a, b, s); 131 } 132 133 void HELPER(gvec_vfa64)(void *v1, const void *v2, const void *v3, 134 CPUS390XState *env, uint32_t desc) 135 { 136 vop64_3(v1, v2, v3, env, false, vfa64, GETPC()); 137 } 138 139 void HELPER(gvec_vfa64s)(void *v1, const void *v2, const void *v3, 140 CPUS390XState *env, uint32_t desc) 141 { 142 vop64_3(v1, v2, v3, env, true, vfa64, GETPC()); 143 } 144 145 static int wfc64(const S390Vector *v1, const S390Vector *v2, 146 CPUS390XState *env, bool signal, uintptr_t retaddr) 147 { 148 /* only the zero-indexed elements are compared */ 149 const float64 a = s390_vec_read_element64(v1, 0); 150 const float64 b = s390_vec_read_element64(v2, 0); 151 uint8_t vxc, vec_exc = 0; 152 int cmp; 153 154 if (signal) { 155 cmp = float64_compare(a, b, &env->fpu_status); 156 } else { 157 cmp = float64_compare_quiet(a, b, &env->fpu_status); 158 } 159 vxc = check_ieee_exc(env, 0, false, &vec_exc); 160 handle_ieee_exc(env, vxc, vec_exc, retaddr); 161 162 return float_comp_to_cc(env, cmp); 163 } 164 165 void HELPER(gvec_wfc64)(const void *v1, const void *v2, CPUS390XState *env, 166 uint32_t desc) 167 { 168 env->cc_op = wfc64(v1, v2, env, false, GETPC()); 169 } 170 171 void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env, 172 uint32_t desc) 173 { 174 env->cc_op = wfc64(v1, v2, env, true, GETPC()); 175 } 176 177 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); 178 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 179 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) 180 { 181 uint8_t vxc, vec_exc = 0; 182 S390Vector tmp = {}; 183 int match = 0; 184 int i; 185 186 for (i = 0; i < 2; i++) { 187 const float64 a = s390_vec_read_element64(v2, i); 188 const float64 b = s390_vec_read_element64(v3, i); 189 190 /* swap the order of the parameters, so we can use existing functions */ 191 if (fn(b, a, &env->fpu_status)) { 192 match++; 193 s390_vec_write_element64(&tmp, i, -1ull); 194 } 195 vxc = check_ieee_exc(env, i, false, &vec_exc); 196 if (s || vxc) { 197 break; 198 } 199 } 200 201 handle_ieee_exc(env, vxc, vec_exc, retaddr); 202 *v1 = tmp; 203 if (match) { 204 return s || match == 2 ? 0 : 1; 205 } 206 return 3; 207 } 208 209 void HELPER(gvec_vfce64)(void *v1, const void *v2, const void *v3, 210 CPUS390XState *env, uint32_t desc) 211 { 212 vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC()); 213 } 214 215 void HELPER(gvec_vfce64s)(void *v1, const void *v2, const void *v3, 216 CPUS390XState *env, uint32_t desc) 217 { 218 vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC()); 219 } 220 221 void HELPER(gvec_vfce64_cc)(void *v1, const void *v2, const void *v3, 222 CPUS390XState *env, uint32_t desc) 223 { 224 env->cc_op = vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC()); 225 } 226 227 void HELPER(gvec_vfce64s_cc)(void *v1, const void *v2, const void *v3, 228 CPUS390XState *env, uint32_t desc) 229 { 230 env->cc_op = vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC()); 231 } 232 233 void HELPER(gvec_vfch64)(void *v1, const void *v2, const void *v3, 234 CPUS390XState *env, uint32_t desc) 235 { 236 vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC()); 237 } 238 239 void HELPER(gvec_vfch64s)(void *v1, const void *v2, const void *v3, 240 CPUS390XState *env, uint32_t desc) 241 { 242 vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC()); 243 } 244 245 void HELPER(gvec_vfch64_cc)(void *v1, const void *v2, const void *v3, 246 CPUS390XState *env, uint32_t desc) 247 { 248 env->cc_op = vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC()); 249 } 250 251 void HELPER(gvec_vfch64s_cc)(void *v1, const void *v2, const void *v3, 252 CPUS390XState *env, uint32_t desc) 253 { 254 env->cc_op = vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC()); 255 } 256 257 void HELPER(gvec_vfche64)(void *v1, const void *v2, const void *v3, 258 CPUS390XState *env, uint32_t desc) 259 { 260 vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC()); 261 } 262 263 void HELPER(gvec_vfche64s)(void *v1, const void *v2, const void *v3, 264 CPUS390XState *env, uint32_t desc) 265 { 266 vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC()); 267 } 268 269 void HELPER(gvec_vfche64_cc)(void *v1, const void *v2, const void *v3, 270 CPUS390XState *env, uint32_t desc) 271 { 272 env->cc_op = vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC()); 273 } 274 275 void HELPER(gvec_vfche64s_cc)(void *v1, const void *v2, const void *v3, 276 CPUS390XState *env, uint32_t desc) 277 { 278 env->cc_op = vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC()); 279 } 280 281 static uint64_t vcdg64(uint64_t a, float_status *s) 282 { 283 return int64_to_float64(a, s); 284 } 285 286 void HELPER(gvec_vcdg64)(void *v1, const void *v2, CPUS390XState *env, 287 uint32_t desc) 288 { 289 const uint8_t erm = extract32(simd_data(desc), 4, 4); 290 const bool XxC = extract32(simd_data(desc), 2, 1); 291 292 vop64_2(v1, v2, env, false, XxC, erm, vcdg64, GETPC()); 293 } 294 295 void HELPER(gvec_vcdg64s)(void *v1, const void *v2, CPUS390XState *env, 296 uint32_t desc) 297 { 298 const uint8_t erm = extract32(simd_data(desc), 4, 4); 299 const bool XxC = extract32(simd_data(desc), 2, 1); 300 301 vop64_2(v1, v2, env, true, XxC, erm, vcdg64, GETPC()); 302 } 303 304 static uint64_t vcdlg64(uint64_t a, float_status *s) 305 { 306 return uint64_to_float64(a, s); 307 } 308 309 void HELPER(gvec_vcdlg64)(void *v1, const void *v2, CPUS390XState *env, 310 uint32_t desc) 311 { 312 const uint8_t erm = extract32(simd_data(desc), 4, 4); 313 const bool XxC = extract32(simd_data(desc), 2, 1); 314 315 vop64_2(v1, v2, env, false, XxC, erm, vcdlg64, GETPC()); 316 } 317 318 void HELPER(gvec_vcdlg64s)(void *v1, const void *v2, CPUS390XState *env, 319 uint32_t desc) 320 { 321 const uint8_t erm = extract32(simd_data(desc), 4, 4); 322 const bool XxC = extract32(simd_data(desc), 2, 1); 323 324 vop64_2(v1, v2, env, true, XxC, erm, vcdlg64, GETPC()); 325 } 326 327 static uint64_t vcgd64(uint64_t a, float_status *s) 328 { 329 const uint64_t tmp = float64_to_int64(a, s); 330 331 return float64_is_any_nan(a) ? INT64_MIN : tmp; 332 } 333 334 void HELPER(gvec_vcgd64)(void *v1, const void *v2, CPUS390XState *env, 335 uint32_t desc) 336 { 337 const uint8_t erm = extract32(simd_data(desc), 4, 4); 338 const bool XxC = extract32(simd_data(desc), 2, 1); 339 340 vop64_2(v1, v2, env, false, XxC, erm, vcgd64, GETPC()); 341 } 342 343 void HELPER(gvec_vcgd64s)(void *v1, const void *v2, CPUS390XState *env, 344 uint32_t desc) 345 { 346 const uint8_t erm = extract32(simd_data(desc), 4, 4); 347 const bool XxC = extract32(simd_data(desc), 2, 1); 348 349 vop64_2(v1, v2, env, true, XxC, erm, vcgd64, GETPC()); 350 } 351 352 static uint64_t vclgd64(uint64_t a, float_status *s) 353 { 354 const uint64_t tmp = float64_to_uint64(a, s); 355 356 return float64_is_any_nan(a) ? 0 : tmp; 357 } 358 359 void HELPER(gvec_vclgd64)(void *v1, const void *v2, CPUS390XState *env, 360 uint32_t desc) 361 { 362 const uint8_t erm = extract32(simd_data(desc), 4, 4); 363 const bool XxC = extract32(simd_data(desc), 2, 1); 364 365 vop64_2(v1, v2, env, false, XxC, erm, vclgd64, GETPC()); 366 } 367 368 void HELPER(gvec_vclgd64s)(void *v1, const void *v2, CPUS390XState *env, 369 uint32_t desc) 370 { 371 const uint8_t erm = extract32(simd_data(desc), 4, 4); 372 const bool XxC = extract32(simd_data(desc), 2, 1); 373 374 vop64_2(v1, v2, env, true, XxC, erm, vclgd64, GETPC()); 375 } 376 377 static uint64_t vfd64(uint64_t a, uint64_t b, float_status *s) 378 { 379 return float64_div(a, b, s); 380 } 381 382 void HELPER(gvec_vfd64)(void *v1, const void *v2, const void *v3, 383 CPUS390XState *env, uint32_t desc) 384 { 385 vop64_3(v1, v2, v3, env, false, vfd64, GETPC()); 386 } 387 388 void HELPER(gvec_vfd64s)(void *v1, const void *v2, const void *v3, 389 CPUS390XState *env, uint32_t desc) 390 { 391 vop64_3(v1, v2, v3, env, true, vfd64, GETPC()); 392 } 393 394 static uint64_t vfi64(uint64_t a, float_status *s) 395 { 396 return float64_round_to_int(a, s); 397 } 398 399 void HELPER(gvec_vfi64)(void *v1, const void *v2, CPUS390XState *env, 400 uint32_t desc) 401 { 402 const uint8_t erm = extract32(simd_data(desc), 4, 4); 403 const bool XxC = extract32(simd_data(desc), 2, 1); 404 405 vop64_2(v1, v2, env, false, XxC, erm, vfi64, GETPC()); 406 } 407 408 void HELPER(gvec_vfi64s)(void *v1, const void *v2, CPUS390XState *env, 409 uint32_t desc) 410 { 411 const uint8_t erm = extract32(simd_data(desc), 4, 4); 412 const bool XxC = extract32(simd_data(desc), 2, 1); 413 414 vop64_2(v1, v2, env, true, XxC, erm, vfi64, GETPC()); 415 } 416 417 static void vfll32(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 418 bool s, uintptr_t retaddr) 419 { 420 uint8_t vxc, vec_exc = 0; 421 S390Vector tmp = {}; 422 int i; 423 424 for (i = 0; i < 2; i++) { 425 /* load from even element */ 426 const float32 a = s390_vec_read_element32(v2, i * 2); 427 const uint64_t ret = float32_to_float64(a, &env->fpu_status); 428 429 s390_vec_write_element64(&tmp, i, ret); 430 /* indicate the source element */ 431 vxc = check_ieee_exc(env, i * 2, false, &vec_exc); 432 if (s || vxc) { 433 break; 434 } 435 } 436 handle_ieee_exc(env, vxc, vec_exc, retaddr); 437 *v1 = tmp; 438 } 439 440 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, 441 uint32_t desc) 442 { 443 vfll32(v1, v2, env, false, GETPC()); 444 } 445 446 void HELPER(gvec_vfll32s)(void *v1, const void *v2, CPUS390XState *env, 447 uint32_t desc) 448 { 449 vfll32(v1, v2, env, true, GETPC()); 450 } 451 452 static void vflr64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 453 bool s, bool XxC, uint8_t erm, uintptr_t retaddr) 454 { 455 uint8_t vxc, vec_exc = 0; 456 S390Vector tmp = {}; 457 int i, old_mode; 458 459 old_mode = s390_swap_bfp_rounding_mode(env, erm); 460 for (i = 0; i < 2; i++) { 461 float64 a = s390_vec_read_element64(v2, i); 462 uint32_t ret = float64_to_float32(a, &env->fpu_status); 463 464 /* place at even element */ 465 s390_vec_write_element32(&tmp, i * 2, ret); 466 /* indicate the source element */ 467 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 468 if (s || vxc) { 469 break; 470 } 471 } 472 s390_restore_bfp_rounding_mode(env, old_mode); 473 handle_ieee_exc(env, vxc, vec_exc, retaddr); 474 *v1 = tmp; 475 } 476 477 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, 478 uint32_t desc) 479 { 480 const uint8_t erm = extract32(simd_data(desc), 4, 4); 481 const bool XxC = extract32(simd_data(desc), 2, 1); 482 483 vflr64(v1, v2, env, false, XxC, erm, GETPC()); 484 } 485 486 void HELPER(gvec_vflr64s)(void *v1, const void *v2, CPUS390XState *env, 487 uint32_t desc) 488 { 489 const uint8_t erm = extract32(simd_data(desc), 4, 4); 490 const bool XxC = extract32(simd_data(desc), 2, 1); 491 492 vflr64(v1, v2, env, true, XxC, erm, GETPC()); 493 } 494 495 static uint64_t vfm64(uint64_t a, uint64_t b, float_status *s) 496 { 497 return float64_mul(a, b, s); 498 } 499 500 void HELPER(gvec_vfm64)(void *v1, const void *v2, const void *v3, 501 CPUS390XState *env, uint32_t desc) 502 { 503 vop64_3(v1, v2, v3, env, false, vfm64, GETPC()); 504 } 505 506 void HELPER(gvec_vfm64s)(void *v1, const void *v2, const void *v3, 507 CPUS390XState *env, uint32_t desc) 508 { 509 vop64_3(v1, v2, v3, env, true, vfm64, GETPC()); 510 } 511 512 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 513 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 514 uintptr_t retaddr) 515 { 516 uint8_t vxc, vec_exc = 0; 517 S390Vector tmp = {}; 518 int i; 519 520 for (i = 0; i < 2; i++) { 521 const uint64_t a = s390_vec_read_element64(v2, i); 522 const uint64_t b = s390_vec_read_element64(v3, i); 523 const uint64_t c = s390_vec_read_element64(v4, i); 524 uint64_t ret = float64_muladd(a, b, c, flags, &env->fpu_status); 525 526 s390_vec_write_element64(&tmp, i, ret); 527 vxc = check_ieee_exc(env, i, false, &vec_exc); 528 if (s || vxc) { 529 break; 530 } 531 } 532 handle_ieee_exc(env, vxc, vec_exc, retaddr); 533 *v1 = tmp; 534 } 535 536 void HELPER(gvec_vfma64)(void *v1, const void *v2, const void *v3, 537 const void *v4, CPUS390XState *env, uint32_t desc) 538 { 539 vfma64(v1, v2, v3, v4, env, false, 0, GETPC()); 540 } 541 542 void HELPER(gvec_vfma64s)(void *v1, const void *v2, const void *v3, 543 const void *v4, CPUS390XState *env, uint32_t desc) 544 { 545 vfma64(v1, v2, v3, v4, env, true, 0, GETPC()); 546 } 547 548 void HELPER(gvec_vfms64)(void *v1, const void *v2, const void *v3, 549 const void *v4, CPUS390XState *env, uint32_t desc) 550 { 551 vfma64(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC()); 552 } 553 554 void HELPER(gvec_vfms64s)(void *v1, const void *v2, const void *v3, 555 const void *v4, CPUS390XState *env, uint32_t desc) 556 { 557 vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC()); 558 } 559 560 static uint64_t vfsq64(uint64_t a, float_status *s) 561 { 562 return float64_sqrt(a, s); 563 } 564 565 void HELPER(gvec_vfsq64)(void *v1, const void *v2, CPUS390XState *env, 566 uint32_t desc) 567 { 568 vop64_2(v1, v2, env, false, false, 0, vfsq64, GETPC()); 569 } 570 571 void HELPER(gvec_vfsq64s)(void *v1, const void *v2, CPUS390XState *env, 572 uint32_t desc) 573 { 574 vop64_2(v1, v2, env, true, false, 0, vfsq64, GETPC()); 575 } 576 577 static uint64_t vfs64(uint64_t a, uint64_t b, float_status *s) 578 { 579 return float64_sub(a, b, s); 580 } 581 582 void HELPER(gvec_vfs64)(void *v1, const void *v2, const void *v3, 583 CPUS390XState *env, uint32_t desc) 584 { 585 vop64_3(v1, v2, v3, env, false, vfs64, GETPC()); 586 } 587 588 void HELPER(gvec_vfs64s)(void *v1, const void *v2, const void *v3, 589 CPUS390XState *env, uint32_t desc) 590 { 591 vop64_3(v1, v2, v3, env, true, vfs64, GETPC()); 592 } 593 594 static int vftci64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 595 bool s, uint16_t i3) 596 { 597 int i, match = 0; 598 599 for (i = 0; i < 2; i++) { 600 float64 a = s390_vec_read_element64(v2, i); 601 602 if (float64_dcmask(env, a) & i3) { 603 match++; 604 s390_vec_write_element64(v1, i, -1ull); 605 } else { 606 s390_vec_write_element64(v1, i, 0); 607 } 608 if (s) { 609 break; 610 } 611 } 612 613 if (match) { 614 return s || match == 2 ? 0 : 1; 615 } 616 return 3; 617 } 618 619 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, 620 uint32_t desc) 621 { 622 env->cc_op = vftci64(v1, v2, env, false, simd_data(desc)); 623 } 624 625 void HELPER(gvec_vftci64s)(void *v1, const void *v2, CPUS390XState *env, 626 uint32_t desc) 627 { 628 env->cc_op = vftci64(v1, v2, env, true, simd_data(desc)); 629 } 630