1 /* 2 * QEMU TCG support -- s390x vector floating point instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "qemu-common.h" 14 #include "cpu.h" 15 #include "internal.h" 16 #include "vec.h" 17 #include "tcg_s390x.h" 18 #include "tcg/tcg-gvec-desc.h" 19 #include "exec/exec-all.h" 20 #include "exec/helper-proto.h" 21 #include "fpu/softfloat.h" 22 23 #define VIC_INVALID 0x1 24 #define VIC_DIVBYZERO 0x2 25 #define VIC_OVERFLOW 0x3 26 #define VIC_UNDERFLOW 0x4 27 #define VIC_INEXACT 0x5 28 29 /* returns the VEX. If the VEX is 0, there is no trap */ 30 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, 31 uint8_t *vec_exc) 32 { 33 uint8_t vece_exc = 0, trap_exc; 34 unsigned qemu_exc; 35 36 /* Retrieve and clear the softfloat exceptions */ 37 qemu_exc = env->fpu_status.float_exception_flags; 38 if (qemu_exc == 0) { 39 return 0; 40 } 41 env->fpu_status.float_exception_flags = 0; 42 43 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); 44 45 /* Add them to the vector-wide s390x exception bits */ 46 *vec_exc |= vece_exc; 47 48 /* Check for traps and construct the VXC */ 49 trap_exc = vece_exc & env->fpc >> 24; 50 if (trap_exc) { 51 if (trap_exc & S390_IEEE_MASK_INVALID) { 52 return enr << 4 | VIC_INVALID; 53 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { 54 return enr << 4 | VIC_DIVBYZERO; 55 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { 56 return enr << 4 | VIC_OVERFLOW; 57 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { 58 return enr << 4 | VIC_UNDERFLOW; 59 } else if (!XxC) { 60 g_assert(trap_exc & S390_IEEE_MASK_INEXACT); 61 /* inexact has lowest priority on traps */ 62 return enr << 4 | VIC_INEXACT; 63 } 64 } 65 return 0; 66 } 67 68 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, 69 uintptr_t retaddr) 70 { 71 if (vxc) { 72 /* on traps, the fpc flags are not updated, instruction is suppressed */ 73 tcg_s390_vector_exception(env, vxc, retaddr); 74 } 75 if (vec_exc) { 76 /* indicate exceptions for all elements combined */ 77 env->fpc |= vec_exc << 16; 78 } 79 } 80 81 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr) 82 { 83 return make_float32(s390_vec_read_element32(v, enr)); 84 } 85 86 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr) 87 { 88 return make_float64(s390_vec_read_element64(v, enr)); 89 } 90 91 static float128 s390_vec_read_float128(const S390Vector *v) 92 { 93 return make_float128(s390_vec_read_element64(v, 0), 94 s390_vec_read_element64(v, 1)); 95 } 96 97 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data) 98 { 99 return s390_vec_write_element32(v, enr, data); 100 } 101 102 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data) 103 { 104 return s390_vec_write_element64(v, enr, data); 105 } 106 107 static void s390_vec_write_float128(S390Vector *v, float128 data) 108 { 109 s390_vec_write_element64(v, 0, data.high); 110 s390_vec_write_element64(v, 1, data.low); 111 } 112 113 typedef float64 (*vop64_2_fn)(float64 a, float_status *s); 114 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, 115 bool s, bool XxC, uint8_t erm, vop64_2_fn fn, 116 uintptr_t retaddr) 117 { 118 uint8_t vxc, vec_exc = 0; 119 S390Vector tmp = {}; 120 int i, old_mode; 121 122 old_mode = s390_swap_bfp_rounding_mode(env, erm); 123 for (i = 0; i < 2; i++) { 124 const float64 a = s390_vec_read_float64(v2, i); 125 126 s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status)); 127 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 128 if (s || vxc) { 129 break; 130 } 131 } 132 s390_restore_bfp_rounding_mode(env, old_mode); 133 handle_ieee_exc(env, vxc, vec_exc, retaddr); 134 *v1 = tmp; 135 } 136 137 static float64 vcdg64(float64 a, float_status *s) 138 { 139 return int64_to_float64(a, s); 140 } 141 142 static float64 vcdlg64(float64 a, float_status *s) 143 { 144 return uint64_to_float64(a, s); 145 } 146 147 static float64 vcgd64(float64 a, float_status *s) 148 { 149 const float64 tmp = float64_to_int64(a, s); 150 151 return float64_is_any_nan(a) ? INT64_MIN : tmp; 152 } 153 154 static float64 vclgd64(float64 a, float_status *s) 155 { 156 const float64 tmp = float64_to_uint64(a, s); 157 158 return float64_is_any_nan(a) ? 0 : tmp; 159 } 160 161 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \ 162 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \ 163 uint32_t desc) \ 164 { \ 165 const uint8_t erm = extract32(simd_data(desc), 4, 4); \ 166 const bool se = extract32(simd_data(desc), 3, 1); \ 167 const bool XxC = extract32(simd_data(desc), 2, 1); \ 168 \ 169 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \ 170 } 171 172 #define DEF_GVEC_VOP2_64(NAME) \ 173 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64) 174 175 #define DEF_GVEC_VOP2(NAME, OP) \ 176 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) 177 178 DEF_GVEC_VOP2_64(vcdg) 179 DEF_GVEC_VOP2_64(vcdlg) 180 DEF_GVEC_VOP2_64(vcgd) 181 DEF_GVEC_VOP2_64(vclgd) 182 DEF_GVEC_VOP2(vfi, round_to_int) 183 DEF_GVEC_VOP2(vfsq, sqrt) 184 185 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s); 186 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 187 CPUS390XState *env, bool s, vop32_3_fn fn, 188 uintptr_t retaddr) 189 { 190 uint8_t vxc, vec_exc = 0; 191 S390Vector tmp = {}; 192 int i; 193 194 for (i = 0; i < 4; i++) { 195 const float32 a = s390_vec_read_float32(v2, i); 196 const float32 b = s390_vec_read_float32(v3, i); 197 198 s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status)); 199 vxc = check_ieee_exc(env, i, false, &vec_exc); 200 if (s || vxc) { 201 break; 202 } 203 } 204 handle_ieee_exc(env, vxc, vec_exc, retaddr); 205 *v1 = tmp; 206 } 207 208 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s); 209 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 210 CPUS390XState *env, bool s, vop64_3_fn fn, 211 uintptr_t retaddr) 212 { 213 uint8_t vxc, vec_exc = 0; 214 S390Vector tmp = {}; 215 int i; 216 217 for (i = 0; i < 2; i++) { 218 const float64 a = s390_vec_read_float64(v2, i); 219 const float64 b = s390_vec_read_float64(v3, i); 220 221 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status)); 222 vxc = check_ieee_exc(env, i, false, &vec_exc); 223 if (s || vxc) { 224 break; 225 } 226 } 227 handle_ieee_exc(env, vxc, vec_exc, retaddr); 228 *v1 = tmp; 229 } 230 231 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s); 232 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 233 CPUS390XState *env, bool s, vop128_3_fn fn, 234 uintptr_t retaddr) 235 { 236 const float128 a = s390_vec_read_float128(v2); 237 const float128 b = s390_vec_read_float128(v3); 238 uint8_t vxc, vec_exc = 0; 239 S390Vector tmp = {}; 240 241 s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status)); 242 vxc = check_ieee_exc(env, 0, false, &vec_exc); 243 handle_ieee_exc(env, vxc, vec_exc, retaddr); 244 *v1 = tmp; 245 } 246 247 #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \ 248 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 249 CPUS390XState *env, uint32_t desc) \ 250 { \ 251 const bool se = extract32(simd_data(desc), 3, 1); \ 252 \ 253 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \ 254 } 255 256 #define DEF_GVEC_VOP3(NAME, OP) \ 257 DEF_GVEC_VOP3_B(NAME, OP, 32) \ 258 DEF_GVEC_VOP3_B(NAME, OP, 64) \ 259 DEF_GVEC_VOP3_B(NAME, OP, 128) 260 261 DEF_GVEC_VOP3(vfa, add) 262 DEF_GVEC_VOP3(vfs, sub) 263 DEF_GVEC_VOP3(vfd, div) 264 DEF_GVEC_VOP3(vfm, mul) 265 266 static int wfc64(const S390Vector *v1, const S390Vector *v2, 267 CPUS390XState *env, bool signal, uintptr_t retaddr) 268 { 269 /* only the zero-indexed elements are compared */ 270 const float64 a = s390_vec_read_float64(v1, 0); 271 const float64 b = s390_vec_read_float64(v2, 0); 272 uint8_t vxc, vec_exc = 0; 273 int cmp; 274 275 if (signal) { 276 cmp = float64_compare(a, b, &env->fpu_status); 277 } else { 278 cmp = float64_compare_quiet(a, b, &env->fpu_status); 279 } 280 vxc = check_ieee_exc(env, 0, false, &vec_exc); 281 handle_ieee_exc(env, vxc, vec_exc, retaddr); 282 283 return float_comp_to_cc(env, cmp); 284 } 285 286 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \ 287 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \ 288 CPUS390XState *env, uint32_t desc) \ 289 { \ 290 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \ 291 } 292 293 #define DEF_GVEC_WFC(NAME, SIGNAL) \ 294 DEF_GVEC_WFC_B(NAME, SIGNAL, 64) 295 296 DEF_GVEC_WFC(wfc, false) 297 DEF_GVEC_WFC(wfk, true) 298 299 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); 300 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 301 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) 302 { 303 uint8_t vxc, vec_exc = 0; 304 S390Vector tmp = {}; 305 int match = 0; 306 int i; 307 308 for (i = 0; i < 2; i++) { 309 const float64 a = s390_vec_read_float64(v2, i); 310 const float64 b = s390_vec_read_float64(v3, i); 311 312 /* swap the order of the parameters, so we can use existing functions */ 313 if (fn(b, a, &env->fpu_status)) { 314 match++; 315 s390_vec_write_element64(&tmp, i, -1ull); 316 } 317 vxc = check_ieee_exc(env, i, false, &vec_exc); 318 if (s || vxc) { 319 break; 320 } 321 } 322 323 handle_ieee_exc(env, vxc, vec_exc, retaddr); 324 *v1 = tmp; 325 if (match) { 326 return s || match == 2 ? 0 : 1; 327 } 328 return 3; 329 } 330 331 #define DEF_GVEC_VFC_B(NAME, OP, BITS) \ 332 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 333 CPUS390XState *env, uint32_t desc) \ 334 { \ 335 const bool se = extract32(simd_data(desc), 3, 1); \ 336 vfc##BITS##_fn fn = float##BITS##_##OP##_quiet; \ 337 \ 338 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ 339 } \ 340 \ 341 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \ 342 CPUS390XState *env, uint32_t desc) \ 343 { \ 344 const bool se = extract32(simd_data(desc), 3, 1); \ 345 vfc##BITS##_fn fn = float##BITS##_##OP##_quiet; \ 346 \ 347 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ 348 } 349 350 #define DEF_GVEC_VFC(NAME, OP) \ 351 DEF_GVEC_VFC_B(NAME, OP, 64) 352 353 DEF_GVEC_VFC(vfce, eq) 354 DEF_GVEC_VFC(vfch, lt) 355 DEF_GVEC_VFC(vfche, le) 356 357 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, 358 uint32_t desc) 359 { 360 const bool s = extract32(simd_data(desc), 3, 1); 361 uint8_t vxc, vec_exc = 0; 362 S390Vector tmp = {}; 363 int i; 364 365 for (i = 0; i < 2; i++) { 366 /* load from even element */ 367 const float32 a = s390_vec_read_element32(v2, i * 2); 368 const uint64_t ret = float32_to_float64(a, &env->fpu_status); 369 370 s390_vec_write_element64(&tmp, i, ret); 371 /* indicate the source element */ 372 vxc = check_ieee_exc(env, i * 2, false, &vec_exc); 373 if (s || vxc) { 374 break; 375 } 376 } 377 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 378 *(S390Vector *)v1 = tmp; 379 } 380 381 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, 382 uint32_t desc) 383 { 384 const uint8_t erm = extract32(simd_data(desc), 4, 4); 385 const bool s = extract32(simd_data(desc), 3, 1); 386 const bool XxC = extract32(simd_data(desc), 2, 1); 387 uint8_t vxc, vec_exc = 0; 388 S390Vector tmp = {}; 389 int i, old_mode; 390 391 old_mode = s390_swap_bfp_rounding_mode(env, erm); 392 for (i = 0; i < 2; i++) { 393 float64 a = s390_vec_read_element64(v2, i); 394 uint32_t ret = float64_to_float32(a, &env->fpu_status); 395 396 /* place at even element */ 397 s390_vec_write_element32(&tmp, i * 2, ret); 398 /* indicate the source element */ 399 vxc = check_ieee_exc(env, i, XxC, &vec_exc); 400 if (s || vxc) { 401 break; 402 } 403 } 404 s390_restore_bfp_rounding_mode(env, old_mode); 405 handle_ieee_exc(env, vxc, vec_exc, GETPC()); 406 *(S390Vector *)v1 = tmp; 407 } 408 409 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, 410 const S390Vector *v4, CPUS390XState *env, bool s, int flags, 411 uintptr_t retaddr) 412 { 413 uint8_t vxc, vec_exc = 0; 414 S390Vector tmp = {}; 415 int i; 416 417 for (i = 0; i < 2; i++) { 418 const float64 a = s390_vec_read_float64(v2, i); 419 const float64 b = s390_vec_read_float64(v3, i); 420 const float64 c = s390_vec_read_float64(v4, i); 421 const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status); 422 423 s390_vec_write_float64(&tmp, i, ret); 424 vxc = check_ieee_exc(env, i, false, &vec_exc); 425 if (s || vxc) { 426 break; 427 } 428 } 429 handle_ieee_exc(env, vxc, vec_exc, retaddr); 430 *v1 = tmp; 431 } 432 433 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \ 434 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ 435 const void *v4, CPUS390XState *env, \ 436 uint32_t desc) \ 437 { \ 438 const bool se = extract32(simd_data(desc), 3, 1); \ 439 \ 440 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \ 441 } 442 443 #define DEF_GVEC_VFMA(NAME, FLAGS) \ 444 DEF_GVEC_VFMA_B(NAME, FLAGS, 64) 445 446 DEF_GVEC_VFMA(vfma, 0) 447 DEF_GVEC_VFMA(vfms, float_muladd_negate_c) 448 449 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, 450 uint32_t desc) 451 { 452 const uint16_t i3 = extract32(simd_data(desc), 4, 12); 453 const bool s = extract32(simd_data(desc), 3, 1); 454 int i, match = 0; 455 456 for (i = 0; i < 2; i++) { 457 const float64 a = s390_vec_read_float64(v2, i); 458 459 if (float64_dcmask(env, a) & i3) { 460 match++; 461 s390_vec_write_element64(v1, i, -1ull); 462 } else { 463 s390_vec_write_element64(v1, i, 0); 464 } 465 if (s) { 466 break; 467 } 468 } 469 470 if (match == 2 || (s && match)) { 471 env->cc_op = 0; 472 } else if (match) { 473 env->cc_op = 1; 474 } else { 475 env->cc_op = 3; 476 } 477 } 478