1 /* 2 * ARM VFP floating-point operations 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "exec/helper-proto.h" 23 #include "internals.h" 24 #include "cpu-features.h" 25 #include "fpu/softfloat.h" 26 #include "qemu/log.h" 27 28 /* 29 * Set the float_status behaviour to match the Arm defaults: 30 * * tininess-before-rounding 31 * * 2-input NaN propagation prefers SNaN over QNaN, and then 32 * operand A over operand B (see FPProcessNaNs() pseudocode) 33 * * 3-input NaN propagation prefers SNaN over QNaN, and then 34 * operand C over A over B (see FPProcessNaNs3() pseudocode, 35 * but note that for QEMU muladd is a * b + c, whereas for 36 * the pseudocode function the arguments are in the order c, a, b. 37 * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, 38 * and the input NaN if it is signalling 39 * * Default NaN has sign bit clear, msb frac bit set 40 */ 41 void arm_set_default_fp_behaviours(float_status *s) 42 { 43 set_float_detect_tininess(float_tininess_before_rounding, s); 44 set_float_ftz_detection(float_ftz_before_rounding, s); 45 set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); 46 set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); 47 set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); 48 set_float_default_nan_pattern(0b01000000, s); 49 } 50 51 /* 52 * Set the float_status behaviour to match the FEAT_AFP 53 * FPCR.AH=1 requirements: 54 * * tininess-after-rounding 55 * * 2-input NaN propagation prefers the first NaN 56 * * 3-input NaN propagation prefers a over b over c 57 * * 0 * Inf + NaN always returns the input NaN and doesn't 58 * set Invalid for a QNaN 59 * * default NaN has sign bit set, msb frac bit set 60 */ 61 void arm_set_ah_fp_behaviours(float_status *s) 62 { 63 set_float_detect_tininess(float_tininess_after_rounding, s); 64 set_float_ftz_detection(float_ftz_after_rounding, s); 65 set_float_2nan_prop_rule(float_2nan_prop_ab, s); 66 set_float_3nan_prop_rule(float_3nan_prop_abc, s); 67 set_float_infzeronan_rule(float_infzeronan_dnan_never | 68 float_infzeronan_suppress_invalid, s); 69 set_float_default_nan_pattern(0b11000000, s); 70 } 71 72 /* Convert host exception flags to vfp form. */ 73 static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah) 74 { 75 uint32_t target_bits = 0; 76 77 if (host_bits & float_flag_invalid) { 78 target_bits |= FPSR_IOC; 79 } 80 if (host_bits & float_flag_divbyzero) { 81 target_bits |= FPSR_DZC; 82 } 83 if (host_bits & float_flag_overflow) { 84 target_bits |= FPSR_OFC; 85 } 86 if (host_bits & (float_flag_underflow | float_flag_output_denormal_flushed)) { 87 target_bits |= FPSR_UFC; 88 } 89 if (host_bits & float_flag_inexact) { 90 target_bits |= FPSR_IXC; 91 } 92 if (host_bits & float_flag_input_denormal_flushed) { 93 target_bits |= FPSR_IDC; 94 } 95 /* 96 * With FPCR.AH, IDC is set when an input denormal is used, 97 * and flushing an output denormal to zero sets both IXC and UFC. 98 */ 99 if (ah && (host_bits & float_flag_input_denormal_used)) { 100 target_bits |= FPSR_IDC; 101 } 102 if (ah && (host_bits & float_flag_output_denormal_flushed)) { 103 target_bits |= FPSR_IXC; 104 } 105 return target_bits; 106 } 107 108 uint32_t vfp_get_fpsr_from_host(CPUARMState *env) 109 { 110 uint32_t a32_flags = 0, a64_flags = 0; 111 112 a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]); 113 a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); 114 /* FZ16 does not generate an input denormal exception. */ 115 a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) 116 & ~float_flag_input_denormal_flushed); 117 a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) 118 & ~float_flag_input_denormal_flushed); 119 120 a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]); 121 a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) 122 & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); 123 /* 124 * We do not merge in flags from FPST_AH or FPST_AH_F16, because 125 * they are used for insns that must not set the cumulative exception bits. 126 */ 127 128 /* 129 * Flushing an input denormal *only* because FPCR.FIZ == 1 does 130 * not set FPSR.IDC; if FPCR.FZ is also set then this takes 131 * precedence and IDC is set (see the FPUnpackBase pseudocode). 132 * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1). 133 * We only do this for the a64 flags because FIZ has no effect 134 * on AArch32 even if it is set. 135 */ 136 if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { 137 a64_flags &= ~float_flag_input_denormal_flushed; 138 } 139 return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) | 140 vfp_exceptbits_from_host(a32_flags, false); 141 } 142 143 void vfp_clear_float_status_exc_flags(CPUARMState *env) 144 { 145 /* 146 * Clear out all the exception-flag information in the float_status 147 * values. The caller should have arranged for env->vfp.fpsr to 148 * be the architecturally up-to-date exception flag information first. 149 */ 150 set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]); 151 set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); 152 set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); 153 set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); 154 set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); 155 set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); 156 set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); 157 set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); 158 } 159 160 static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) 161 { 162 /* 163 * Synchronize any pending exception-flag information in the 164 * float_status values into env->vfp.fpsr, and then clear out 165 * the float_status data. 166 */ 167 env->vfp.fpsr |= vfp_get_fpsr_from_host(env); 168 vfp_clear_float_status_exc_flags(env); 169 } 170 171 void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) 172 { 173 uint64_t changed = env->vfp.fpcr; 174 175 changed ^= val; 176 changed &= mask; 177 if (changed & (3 << 22)) { 178 int i = (val >> 22) & 3; 179 switch (i) { 180 case FPROUNDING_TIEEVEN: 181 i = float_round_nearest_even; 182 break; 183 case FPROUNDING_POSINF: 184 i = float_round_up; 185 break; 186 case FPROUNDING_NEGINF: 187 i = float_round_down; 188 break; 189 case FPROUNDING_ZERO: 190 i = float_round_to_zero; 191 break; 192 } 193 set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]); 194 set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); 195 set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); 196 set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); 197 } 198 if (changed & FPCR_FZ16) { 199 bool ftz_enabled = val & FPCR_FZ16; 200 set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); 201 set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); 202 set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); 203 set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); 204 set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); 205 set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); 206 set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); 207 set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); 208 } 209 if (changed & FPCR_FZ) { 210 bool ftz_enabled = val & FPCR_FZ; 211 set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); 212 set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); 213 /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ 214 set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); 215 } 216 if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { 217 /* 218 * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or 219 * both FPCR.AH = 0 and FPCR.FZ = 1. 220 */ 221 bool fitz_enabled = (val & FPCR_FIZ) || 222 (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; 223 set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); 224 } 225 if (changed & FPCR_DN) { 226 bool dnan_enabled = val & FPCR_DN; 227 set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]); 228 set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); 229 set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); 230 set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); 231 set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); 232 set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); 233 } 234 if (changed & FPCR_AH) { 235 bool ah_enabled = val & FPCR_AH; 236 237 if (ah_enabled) { 238 /* Change behaviours for A64 FP operations */ 239 arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); 240 arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); 241 } else { 242 arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); 243 arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); 244 } 245 } 246 /* 247 * If any bits changed that we look at in vfp_get_fpsr_from_host(), 248 * we must sync the float_status flags into vfp.fpsr now (under the 249 * old regime) before we update vfp.fpcr. 250 */ 251 if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { 252 vfp_sync_and_clear_float_status_exc_flags(env); 253 } 254 } 255 256 /* 257 * VFP support. We follow the convention used for VFP instructions: 258 * Single precision routines have a "s" suffix, double precision a 259 * "d" suffix. 260 */ 261 262 #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p)) 263 264 #define VFP_BINOP(name) \ 265 dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, float_status *fpst) \ 266 { \ 267 return float16_ ## name(a, b, fpst); \ 268 } \ 269 float32 VFP_HELPER(name, s)(float32 a, float32 b, float_status *fpst) \ 270 { \ 271 return float32_ ## name(a, b, fpst); \ 272 } \ 273 float64 VFP_HELPER(name, d)(float64 a, float64 b, float_status *fpst) \ 274 { \ 275 return float64_ ## name(a, b, fpst); \ 276 } 277 VFP_BINOP(add) 278 VFP_BINOP(sub) 279 VFP_BINOP(mul) 280 VFP_BINOP(div) 281 VFP_BINOP(min) 282 VFP_BINOP(max) 283 VFP_BINOP(minnum) 284 VFP_BINOP(maxnum) 285 #undef VFP_BINOP 286 287 dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, float_status *fpst) 288 { 289 return float16_sqrt(a, fpst); 290 } 291 292 float32 VFP_HELPER(sqrt, s)(float32 a, float_status *fpst) 293 { 294 return float32_sqrt(a, fpst); 295 } 296 297 float64 VFP_HELPER(sqrt, d)(float64 a, float_status *fpst) 298 { 299 return float64_sqrt(a, fpst); 300 } 301 302 static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) 303 { 304 uint32_t flags; 305 switch (cmp) { 306 case float_relation_equal: 307 flags = 0x6; 308 break; 309 case float_relation_less: 310 flags = 0x8; 311 break; 312 case float_relation_greater: 313 flags = 0x2; 314 break; 315 case float_relation_unordered: 316 flags = 0x3; 317 break; 318 default: 319 g_assert_not_reached(); 320 } 321 env->vfp.fpsr = deposit64(env->vfp.fpsr, 28, 4, flags); /* NZCV */ 322 } 323 324 /* XXX: check quiet/signaling case */ 325 #define DO_VFP_cmp(P, FLOATTYPE, ARGTYPE, FPST) \ 326 void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ 327 { \ 328 softfloat_to_vfp_compare(env, \ 329 FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \ 330 } \ 331 void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ 332 { \ 333 softfloat_to_vfp_compare(env, \ 334 FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \ 335 } 336 DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16) 337 DO_VFP_cmp(s, float32, float32, FPST_A32) 338 DO_VFP_cmp(d, float64, float64, FPST_A32) 339 #undef DO_VFP_cmp 340 341 /* Integer to float and float to integer conversions */ 342 343 #define CONV_ITOF(name, ftype, fsz, sign) \ 344 ftype HELPER(name)(uint32_t x, float_status *fpst) \ 345 { \ 346 return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \ 347 } 348 349 #define CONV_FTOI(name, ftype, fsz, sign, round) \ 350 sign##int32_t HELPER(name)(ftype x, float_status *fpst) \ 351 { \ 352 if (float##fsz##_is_any_nan(x)) { \ 353 float_raise(float_flag_invalid, fpst); \ 354 return 0; \ 355 } \ 356 return float##fsz##_to_##sign##int32##round(x, fpst); \ 357 } 358 359 #define FLOAT_CONVS(name, p, ftype, fsz, sign) \ 360 CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign) \ 361 CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, ) \ 362 CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero) 363 364 FLOAT_CONVS(si, h, uint32_t, 16, ) 365 FLOAT_CONVS(si, s, float32, 32, ) 366 FLOAT_CONVS(si, d, float64, 64, ) 367 FLOAT_CONVS(ui, h, uint32_t, 16, u) 368 FLOAT_CONVS(ui, s, float32, 32, u) 369 FLOAT_CONVS(ui, d, float64, 64, u) 370 371 #undef CONV_ITOF 372 #undef CONV_FTOI 373 #undef FLOAT_CONVS 374 375 /* floating point conversion */ 376 float64 VFP_HELPER(fcvtd, s)(float32 x, float_status *status) 377 { 378 return float32_to_float64(x, status); 379 } 380 381 float32 VFP_HELPER(fcvts, d)(float64 x, float_status *status) 382 { 383 return float64_to_float32(x, status); 384 } 385 386 uint32_t HELPER(bfcvt)(float32 x, float_status *status) 387 { 388 return float32_to_bfloat16(x, status); 389 } 390 391 uint32_t HELPER(bfcvt_pair)(uint64_t pair, float_status *status) 392 { 393 bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status); 394 bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status); 395 return deposit32(lo, 16, 16, hi); 396 } 397 398 /* 399 * VFP3 fixed point conversion. The AArch32 versions of fix-to-float 400 * must always round-to-nearest; the AArch64 ones honour the FPSCR 401 * rounding mode. (For AArch32 Neon the standard-FPSCR is set to 402 * round-to-nearest so either helper will work.) AArch32 float-to-fix 403 * must round-to-zero. 404 */ 405 #define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ 406 ftype HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \ 407 float_status *fpst) \ 408 { return itype##_to_##float##fsz##_scalbn(x, -shift, fpst); } 409 410 #define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \ 411 ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t x, \ 412 uint32_t shift, \ 413 float_status *fpst) \ 414 { \ 415 ftype ret; \ 416 FloatRoundMode oldmode = fpst->float_rounding_mode; \ 417 fpst->float_rounding_mode = float_round_nearest_even; \ 418 ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpst); \ 419 fpst->float_rounding_mode = oldmode; \ 420 return ret; \ 421 } 422 423 #define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \ 424 uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \ 425 float_status *fpst) \ 426 { \ 427 if (unlikely(float##fsz##_is_any_nan(x))) { \ 428 float_raise(float_flag_invalid, fpst); \ 429 return 0; \ 430 } \ 431 return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst); \ 432 } 433 434 #define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype) \ 435 VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ 436 VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \ 437 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \ 438 float_round_to_zero, _round_to_zero) \ 439 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \ 440 get_float_rounding_mode(fpst), ) 441 442 #define VFP_CONV_FIX_A64(name, p, fsz, ftype, isz, itype) \ 443 VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ 444 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \ 445 get_float_rounding_mode(fpst), ) 446 447 VFP_CONV_FIX(sh, d, 64, float64, 64, int16) 448 VFP_CONV_FIX(sl, d, 64, float64, 64, int32) 449 VFP_CONV_FIX_A64(sq, d, 64, float64, 64, int64) 450 VFP_CONV_FIX(uh, d, 64, float64, 64, uint16) 451 VFP_CONV_FIX(ul, d, 64, float64, 64, uint32) 452 VFP_CONV_FIX_A64(uq, d, 64, float64, 64, uint64) 453 VFP_CONV_FIX(sh, s, 32, float32, 32, int16) 454 VFP_CONV_FIX(sl, s, 32, float32, 32, int32) 455 VFP_CONV_FIX_A64(sq, s, 32, float32, 64, int64) 456 VFP_CONV_FIX(uh, s, 32, float32, 32, uint16) 457 VFP_CONV_FIX(ul, s, 32, float32, 32, uint32) 458 VFP_CONV_FIX_A64(uq, s, 32, float32, 64, uint64) 459 VFP_CONV_FIX(sh, h, 16, dh_ctype_f16, 32, int16) 460 VFP_CONV_FIX(sl, h, 16, dh_ctype_f16, 32, int32) 461 VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64) 462 VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16) 463 VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32) 464 VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64) 465 VFP_CONV_FLOAT_FIX_ROUND(sq, d, 64, float64, 64, int64, 466 float_round_to_zero, _round_to_zero) 467 VFP_CONV_FLOAT_FIX_ROUND(uq, d, 64, float64, 64, uint64, 468 float_round_to_zero, _round_to_zero) 469 470 #undef VFP_CONV_FIX 471 #undef VFP_CONV_FIX_FLOAT 472 #undef VFP_CONV_FLOAT_FIX_ROUND 473 #undef VFP_CONV_FIX_A64 474 475 /* Set the current fp rounding mode and return the old one. 476 * The argument is a softfloat float_round_ value. 477 */ 478 uint32_t HELPER(set_rmode)(uint32_t rmode, float_status *fp_status) 479 { 480 uint32_t prev_rmode = get_float_rounding_mode(fp_status); 481 set_float_rounding_mode(rmode, fp_status); 482 483 return prev_rmode; 484 } 485 486 /* Half precision conversions. */ 487 float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, float_status *fpst, 488 uint32_t ahp_mode) 489 { 490 /* Squash FZ16 to 0 for the duration of conversion. In this case, 491 * it would affect flushing input denormals. 492 */ 493 bool save = get_flush_inputs_to_zero(fpst); 494 set_flush_inputs_to_zero(false, fpst); 495 float32 r = float16_to_float32(a, !ahp_mode, fpst); 496 set_flush_inputs_to_zero(save, fpst); 497 return r; 498 } 499 500 uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, float_status *fpst, 501 uint32_t ahp_mode) 502 { 503 /* Squash FZ16 to 0 for the duration of conversion. In this case, 504 * it would affect flushing output denormals. 505 */ 506 bool save = get_flush_to_zero(fpst); 507 set_flush_to_zero(false, fpst); 508 float16 r = float32_to_float16(a, !ahp_mode, fpst); 509 set_flush_to_zero(save, fpst); 510 return r; 511 } 512 513 float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, float_status *fpst, 514 uint32_t ahp_mode) 515 { 516 /* Squash FZ16 to 0 for the duration of conversion. In this case, 517 * it would affect flushing input denormals. 518 */ 519 bool save = get_flush_inputs_to_zero(fpst); 520 set_flush_inputs_to_zero(false, fpst); 521 float64 r = float16_to_float64(a, !ahp_mode, fpst); 522 set_flush_inputs_to_zero(save, fpst); 523 return r; 524 } 525 526 uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, float_status *fpst, 527 uint32_t ahp_mode) 528 { 529 /* Squash FZ16 to 0 for the duration of conversion. In this case, 530 * it would affect flushing output denormals. 531 */ 532 bool save = get_flush_to_zero(fpst); 533 set_flush_to_zero(false, fpst); 534 float16 r = float64_to_float16(a, !ahp_mode, fpst); 535 set_flush_to_zero(save, fpst); 536 return r; 537 } 538 539 /* NEON helpers. */ 540 541 /* Constants 256 and 512 are used in some helpers; we avoid relying on 542 * int->float conversions at run-time. */ 543 #define float64_256 make_float64(0x4070000000000000LL) 544 #define float64_512 make_float64(0x4080000000000000LL) 545 #define float16_maxnorm make_float16(0x7bff) 546 #define float32_maxnorm make_float32(0x7f7fffff) 547 #define float64_maxnorm make_float64(0x7fefffffffffffffLL) 548 549 /* Reciprocal functions 550 * 551 * The algorithm that must be used to calculate the estimate 552 * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate 553 */ 554 555 /* See RecipEstimate() 556 * 557 * input is a 9 bit fixed point number 558 * input range 256 .. 511 for a number from 0.5 <= x < 1.0. 559 * result range 256 .. 511 for a number from 1.0 to 511/256. 560 */ 561 562 static int recip_estimate(int input) 563 { 564 int a, b, r; 565 assert(256 <= input && input < 512); 566 a = (input * 2) + 1; 567 b = (1 << 19) / a; 568 r = (b + 1) >> 1; 569 assert(256 <= r && r < 512); 570 return r; 571 } 572 573 /* 574 * Increased precision version: 575 * input is a 13 bit fixed point number 576 * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0. 577 * result range 4096 .. 8191 for a number from 1.0 to 2.0 578 */ 579 static int recip_estimate_incprec(int input) 580 { 581 int a, b, r; 582 assert(2048 <= input && input < 4096); 583 a = (input * 2) + 1; 584 /* 585 * The pseudocode expresses this as an operation on infinite 586 * precision reals where it calculates 2^25 / a and then looks 587 * at the error between that and the rounded-down-to-integer 588 * value to see if it should instead round up. We instead 589 * follow the same approach as the pseudocode for the 8-bit 590 * precision version, and calculate (2 * (2^25 / a)) as an 591 * integer so we can do the "add one and halve" to round it. 592 * So the 1 << 26 here is correct. 593 */ 594 b = (1 << 26) / a; 595 r = (b + 1) >> 1; 596 assert(4096 <= r && r < 8192); 597 return r; 598 } 599 600 /* 601 * Common wrapper to call recip_estimate 602 * 603 * The parameters are exponent and 64 bit fraction (without implicit 604 * bit) where the binary point is nominally at bit 52. Returns a 605 * float64 which can then be rounded to the appropriate size by the 606 * callee. 607 */ 608 609 static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac, 610 bool increasedprecision) 611 { 612 uint32_t scaled, estimate; 613 uint64_t result_frac; 614 int result_exp; 615 616 /* Handle sub-normals */ 617 if (*exp == 0) { 618 if (extract64(frac, 51, 1) == 0) { 619 *exp = -1; 620 frac <<= 2; 621 } else { 622 frac <<= 1; 623 } 624 } 625 626 if (increasedprecision) { 627 /* scaled = UInt('1':fraction<51:41>) */ 628 scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); 629 estimate = recip_estimate_incprec(scaled); 630 } else { 631 /* scaled = UInt('1':fraction<51:44>) */ 632 scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); 633 estimate = recip_estimate(scaled); 634 } 635 636 result_exp = exp_off - *exp; 637 if (increasedprecision) { 638 result_frac = deposit64(0, 40, 12, estimate); 639 } else { 640 result_frac = deposit64(0, 44, 8, estimate); 641 } 642 if (result_exp == 0) { 643 result_frac = deposit64(result_frac >> 1, 51, 1, 1); 644 } else if (result_exp == -1) { 645 result_frac = deposit64(result_frac >> 2, 50, 2, 1); 646 result_exp = 0; 647 } 648 649 *exp = result_exp; 650 651 return result_frac; 652 } 653 654 static bool round_to_inf(float_status *fpst, bool sign_bit) 655 { 656 switch (fpst->float_rounding_mode) { 657 case float_round_nearest_even: /* Round to Nearest */ 658 return true; 659 case float_round_up: /* Round to +Inf */ 660 return !sign_bit; 661 case float_round_down: /* Round to -Inf */ 662 return sign_bit; 663 case float_round_to_zero: /* Round to Zero */ 664 return false; 665 default: 666 g_assert_not_reached(); 667 } 668 } 669 670 uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) 671 { 672 float16 f16 = float16_squash_input_denormal(input, fpst); 673 uint32_t f16_val = float16_val(f16); 674 uint32_t f16_sign = float16_is_neg(f16); 675 int f16_exp = extract32(f16_val, 10, 5); 676 uint32_t f16_frac = extract32(f16_val, 0, 10); 677 uint64_t f64_frac; 678 679 if (float16_is_any_nan(f16)) { 680 float16 nan = f16; 681 if (float16_is_signaling_nan(f16, fpst)) { 682 float_raise(float_flag_invalid, fpst); 683 if (!fpst->default_nan_mode) { 684 nan = float16_silence_nan(f16, fpst); 685 } 686 } 687 if (fpst->default_nan_mode) { 688 nan = float16_default_nan(fpst); 689 } 690 return nan; 691 } else if (float16_is_infinity(f16)) { 692 return float16_set_sign(float16_zero, float16_is_neg(f16)); 693 } else if (float16_is_zero(f16)) { 694 float_raise(float_flag_divbyzero, fpst); 695 return float16_set_sign(float16_infinity, float16_is_neg(f16)); 696 } else if (float16_abs(f16) < (1 << 8)) { 697 /* Abs(value) < 2.0^-16 */ 698 float_raise(float_flag_overflow | float_flag_inexact, fpst); 699 if (round_to_inf(fpst, f16_sign)) { 700 return float16_set_sign(float16_infinity, f16_sign); 701 } else { 702 return float16_set_sign(float16_maxnorm, f16_sign); 703 } 704 } else if (f16_exp >= 29 && fpst->flush_to_zero) { 705 float_raise(float_flag_underflow, fpst); 706 return float16_set_sign(float16_zero, float16_is_neg(f16)); 707 } 708 709 f64_frac = call_recip_estimate(&f16_exp, 29, 710 ((uint64_t) f16_frac) << (52 - 10), false); 711 712 /* result = sign : result_exp<4:0> : fraction<51:42> */ 713 f16_val = deposit32(0, 15, 1, f16_sign); 714 f16_val = deposit32(f16_val, 10, 5, f16_exp); 715 f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10)); 716 return make_float16(f16_val); 717 } 718 719 /* 720 * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant 721 * which is used when FPCR.AH == 1. 722 */ 723 static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) 724 { 725 float32 f32 = float32_squash_input_denormal(input, fpst); 726 uint32_t f32_val = float32_val(f32); 727 bool f32_sign = float32_is_neg(f32); 728 int f32_exp = extract32(f32_val, 23, 8); 729 uint32_t f32_frac = extract32(f32_val, 0, 23); 730 uint64_t f64_frac; 731 732 if (float32_is_any_nan(f32)) { 733 float32 nan = f32; 734 if (float32_is_signaling_nan(f32, fpst)) { 735 float_raise(float_flag_invalid, fpst); 736 if (!fpst->default_nan_mode) { 737 nan = float32_silence_nan(f32, fpst); 738 } 739 } 740 if (fpst->default_nan_mode) { 741 nan = float32_default_nan(fpst); 742 } 743 return nan; 744 } else if (float32_is_infinity(f32)) { 745 return float32_set_sign(float32_zero, float32_is_neg(f32)); 746 } else if (float32_is_zero(f32)) { 747 float_raise(float_flag_divbyzero, fpst); 748 return float32_set_sign(float32_infinity, float32_is_neg(f32)); 749 } else if (float32_abs(f32) < (1ULL << 21)) { 750 /* Abs(value) < 2.0^-128 */ 751 float_raise(float_flag_overflow | float_flag_inexact, fpst); 752 if (round_to_inf(fpst, f32_sign)) { 753 return float32_set_sign(float32_infinity, f32_sign); 754 } else { 755 return float32_set_sign(float32_maxnorm, f32_sign); 756 } 757 } else if (f32_exp >= 253 && fpst->flush_to_zero) { 758 float_raise(float_flag_underflow, fpst); 759 return float32_set_sign(float32_zero, float32_is_neg(f32)); 760 } 761 762 f64_frac = call_recip_estimate(&f32_exp, 253, 763 ((uint64_t) f32_frac) << (52 - 23), rpres); 764 765 /* result = sign : result_exp<7:0> : fraction<51:29> */ 766 f32_val = deposit32(0, 31, 1, f32_sign); 767 f32_val = deposit32(f32_val, 23, 8, f32_exp); 768 f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23)); 769 return make_float32(f32_val); 770 } 771 772 float32 HELPER(recpe_f32)(float32 input, float_status *fpst) 773 { 774 return do_recpe_f32(input, fpst, false); 775 } 776 777 float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst) 778 { 779 return do_recpe_f32(input, fpst, true); 780 } 781 782 float64 HELPER(recpe_f64)(float64 input, float_status *fpst) 783 { 784 float64 f64 = float64_squash_input_denormal(input, fpst); 785 uint64_t f64_val = float64_val(f64); 786 bool f64_sign = float64_is_neg(f64); 787 int f64_exp = extract64(f64_val, 52, 11); 788 uint64_t f64_frac = extract64(f64_val, 0, 52); 789 790 /* Deal with any special cases */ 791 if (float64_is_any_nan(f64)) { 792 float64 nan = f64; 793 if (float64_is_signaling_nan(f64, fpst)) { 794 float_raise(float_flag_invalid, fpst); 795 if (!fpst->default_nan_mode) { 796 nan = float64_silence_nan(f64, fpst); 797 } 798 } 799 if (fpst->default_nan_mode) { 800 nan = float64_default_nan(fpst); 801 } 802 return nan; 803 } else if (float64_is_infinity(f64)) { 804 return float64_set_sign(float64_zero, float64_is_neg(f64)); 805 } else if (float64_is_zero(f64)) { 806 float_raise(float_flag_divbyzero, fpst); 807 return float64_set_sign(float64_infinity, float64_is_neg(f64)); 808 } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) { 809 /* Abs(value) < 2.0^-1024 */ 810 float_raise(float_flag_overflow | float_flag_inexact, fpst); 811 if (round_to_inf(fpst, f64_sign)) { 812 return float64_set_sign(float64_infinity, f64_sign); 813 } else { 814 return float64_set_sign(float64_maxnorm, f64_sign); 815 } 816 } else if (f64_exp >= 2045 && fpst->flush_to_zero) { 817 float_raise(float_flag_underflow, fpst); 818 return float64_set_sign(float64_zero, float64_is_neg(f64)); 819 } 820 821 f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false); 822 823 /* result = sign : result_exp<10:0> : fraction<51:0>; */ 824 f64_val = deposit64(0, 63, 1, f64_sign); 825 f64_val = deposit64(f64_val, 52, 11, f64_exp); 826 f64_val = deposit64(f64_val, 0, 52, f64_frac); 827 return make_float64(f64_val); 828 } 829 830 /* The algorithm that must be used to calculate the estimate 831 * is specified by the ARM ARM. 832 */ 833 834 static int do_recip_sqrt_estimate(int a) 835 { 836 int b, estimate; 837 838 assert(128 <= a && a < 512); 839 if (a < 256) { 840 a = a * 2 + 1; 841 } else { 842 a = (a >> 1) << 1; 843 a = (a + 1) * 2; 844 } 845 b = 512; 846 while (a * (b + 1) * (b + 1) < (1 << 28)) { 847 b += 1; 848 } 849 estimate = (b + 1) / 2; 850 assert(256 <= estimate && estimate < 512); 851 852 return estimate; 853 } 854 855 static int do_recip_sqrt_estimate_incprec(int a) 856 { 857 /* 858 * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate 859 * in terms of an infinite-precision floating point calculation of a 860 * square root. We implement this using the same kind of pure integer 861 * algorithm as the 8-bit mantissa, to get the same bit-for-bit result. 862 */ 863 int64_t b, estimate; 864 865 assert(1024 <= a && a < 4096); 866 if (a < 2048) { 867 a = a * 2 + 1; 868 } else { 869 a = (a >> 1) << 1; 870 a = (a + 1) * 2; 871 } 872 b = 8192; 873 while (a * (b + 1) * (b + 1) < (1ULL << 39)) { 874 b += 1; 875 } 876 estimate = (b + 1) / 2; 877 878 assert(4096 <= estimate && estimate < 8192); 879 880 return estimate; 881 } 882 883 static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac, 884 bool increasedprecision) 885 { 886 int estimate; 887 uint32_t scaled; 888 889 if (*exp == 0) { 890 while (extract64(frac, 51, 1) == 0) { 891 frac = frac << 1; 892 *exp -= 1; 893 } 894 frac = extract64(frac, 0, 51) << 1; 895 } 896 897 if (increasedprecision) { 898 if (*exp & 1) { 899 /* scaled = UInt('01':fraction<51:42>) */ 900 scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10)); 901 } else { 902 /* scaled = UInt('1':fraction<51:41>) */ 903 scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); 904 } 905 estimate = do_recip_sqrt_estimate_incprec(scaled); 906 } else { 907 if (*exp & 1) { 908 /* scaled = UInt('01':fraction<51:45>) */ 909 scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); 910 } else { 911 /* scaled = UInt('1':fraction<51:44>) */ 912 scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); 913 } 914 estimate = do_recip_sqrt_estimate(scaled); 915 } 916 917 *exp = (exp_off - *exp) / 2; 918 if (increasedprecision) { 919 return extract64(estimate, 0, 12) << 40; 920 } else { 921 return extract64(estimate, 0, 8) << 44; 922 } 923 } 924 925 uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) 926 { 927 float16 f16 = float16_squash_input_denormal(input, s); 928 uint16_t val = float16_val(f16); 929 bool f16_sign = float16_is_neg(f16); 930 int f16_exp = extract32(val, 10, 5); 931 uint16_t f16_frac = extract32(val, 0, 10); 932 uint64_t f64_frac; 933 934 if (float16_is_any_nan(f16)) { 935 float16 nan = f16; 936 if (float16_is_signaling_nan(f16, s)) { 937 float_raise(float_flag_invalid, s); 938 if (!s->default_nan_mode) { 939 nan = float16_silence_nan(f16, s); 940 } 941 } 942 if (s->default_nan_mode) { 943 nan = float16_default_nan(s); 944 } 945 return nan; 946 } else if (float16_is_zero(f16)) { 947 float_raise(float_flag_divbyzero, s); 948 return float16_set_sign(float16_infinity, f16_sign); 949 } else if (f16_sign) { 950 float_raise(float_flag_invalid, s); 951 return float16_default_nan(s); 952 } else if (float16_is_infinity(f16)) { 953 return float16_zero; 954 } 955 956 /* Scale and normalize to a double-precision value between 0.25 and 1.0, 957 * preserving the parity of the exponent. */ 958 959 f64_frac = ((uint64_t) f16_frac) << (52 - 10); 960 961 f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false); 962 963 /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */ 964 val = deposit32(0, 15, 1, f16_sign); 965 val = deposit32(val, 10, 5, f16_exp); 966 val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8)); 967 return make_float16(val); 968 } 969 970 /* 971 * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant 972 * which is used when FPCR.AH == 1. 973 */ 974 static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) 975 { 976 float32 f32 = float32_squash_input_denormal(input, s); 977 uint32_t val = float32_val(f32); 978 uint32_t f32_sign = float32_is_neg(f32); 979 int f32_exp = extract32(val, 23, 8); 980 uint32_t f32_frac = extract32(val, 0, 23); 981 uint64_t f64_frac; 982 983 if (float32_is_any_nan(f32)) { 984 float32 nan = f32; 985 if (float32_is_signaling_nan(f32, s)) { 986 float_raise(float_flag_invalid, s); 987 if (!s->default_nan_mode) { 988 nan = float32_silence_nan(f32, s); 989 } 990 } 991 if (s->default_nan_mode) { 992 nan = float32_default_nan(s); 993 } 994 return nan; 995 } else if (float32_is_zero(f32)) { 996 float_raise(float_flag_divbyzero, s); 997 return float32_set_sign(float32_infinity, float32_is_neg(f32)); 998 } else if (float32_is_neg(f32)) { 999 float_raise(float_flag_invalid, s); 1000 return float32_default_nan(s); 1001 } else if (float32_is_infinity(f32)) { 1002 return float32_zero; 1003 } 1004 1005 /* Scale and normalize to a double-precision value between 0.25 and 1.0, 1006 * preserving the parity of the exponent. */ 1007 1008 f64_frac = ((uint64_t) f32_frac) << 29; 1009 1010 f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres); 1011 1012 /* 1013 * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15) 1014 * or for increased precision 1015 * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11) 1016 */ 1017 val = deposit32(0, 31, 1, f32_sign); 1018 val = deposit32(val, 23, 8, f32_exp); 1019 if (rpres) { 1020 val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12)); 1021 } else { 1022 val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); 1023 } 1024 return make_float32(val); 1025 } 1026 1027 float32 HELPER(rsqrte_f32)(float32 input, float_status *s) 1028 { 1029 return do_rsqrte_f32(input, s, false); 1030 } 1031 1032 float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s) 1033 { 1034 return do_rsqrte_f32(input, s, true); 1035 } 1036 1037 float64 HELPER(rsqrte_f64)(float64 input, float_status *s) 1038 { 1039 float64 f64 = float64_squash_input_denormal(input, s); 1040 uint64_t val = float64_val(f64); 1041 bool f64_sign = float64_is_neg(f64); 1042 int f64_exp = extract64(val, 52, 11); 1043 uint64_t f64_frac = extract64(val, 0, 52); 1044 1045 if (float64_is_any_nan(f64)) { 1046 float64 nan = f64; 1047 if (float64_is_signaling_nan(f64, s)) { 1048 float_raise(float_flag_invalid, s); 1049 if (!s->default_nan_mode) { 1050 nan = float64_silence_nan(f64, s); 1051 } 1052 } 1053 if (s->default_nan_mode) { 1054 nan = float64_default_nan(s); 1055 } 1056 return nan; 1057 } else if (float64_is_zero(f64)) { 1058 float_raise(float_flag_divbyzero, s); 1059 return float64_set_sign(float64_infinity, float64_is_neg(f64)); 1060 } else if (float64_is_neg(f64)) { 1061 float_raise(float_flag_invalid, s); 1062 return float64_default_nan(s); 1063 } else if (float64_is_infinity(f64)) { 1064 return float64_zero; 1065 } 1066 1067 f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false); 1068 1069 /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */ 1070 val = deposit64(0, 61, 1, f64_sign); 1071 val = deposit64(val, 52, 11, f64_exp); 1072 val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8)); 1073 return make_float64(val); 1074 } 1075 1076 uint32_t HELPER(recpe_u32)(uint32_t a) 1077 { 1078 int input, estimate; 1079 1080 if ((a & 0x80000000) == 0) { 1081 return 0xffffffff; 1082 } 1083 1084 input = extract32(a, 23, 9); 1085 estimate = recip_estimate(input); 1086 1087 return deposit32(0, (32 - 9), 9, estimate); 1088 } 1089 1090 uint32_t HELPER(rsqrte_u32)(uint32_t a) 1091 { 1092 int estimate; 1093 1094 if ((a & 0xc0000000) == 0) { 1095 return 0xffffffff; 1096 } 1097 1098 estimate = do_recip_sqrt_estimate(extract32(a, 23, 9)); 1099 1100 return deposit32(0, 23, 9, estimate); 1101 } 1102 1103 /* VFPv4 fused multiply-accumulate */ 1104 dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b, 1105 dh_ctype_f16 c, float_status *fpst) 1106 { 1107 return float16_muladd(a, b, c, 0, fpst); 1108 } 1109 1110 float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, 1111 float_status *fpst) 1112 { 1113 return float32_muladd(a, b, c, 0, fpst); 1114 } 1115 1116 float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, 1117 float_status *fpst) 1118 { 1119 return float64_muladd(a, b, c, 0, fpst); 1120 } 1121 1122 /* ARMv8 round to integral */ 1123 dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, float_status *fp_status) 1124 { 1125 return float16_round_to_int(x, fp_status); 1126 } 1127 1128 float32 HELPER(rints_exact)(float32 x, float_status *fp_status) 1129 { 1130 return float32_round_to_int(x, fp_status); 1131 } 1132 1133 float64 HELPER(rintd_exact)(float64 x, float_status *fp_status) 1134 { 1135 return float64_round_to_int(x, fp_status); 1136 } 1137 1138 dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, float_status *fp_status) 1139 { 1140 int old_flags = get_float_exception_flags(fp_status), new_flags; 1141 float16 ret; 1142 1143 ret = float16_round_to_int(x, fp_status); 1144 1145 /* Suppress any inexact exceptions the conversion produced */ 1146 if (!(old_flags & float_flag_inexact)) { 1147 new_flags = get_float_exception_flags(fp_status); 1148 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 1149 } 1150 1151 return ret; 1152 } 1153 1154 float32 HELPER(rints)(float32 x, float_status *fp_status) 1155 { 1156 int old_flags = get_float_exception_flags(fp_status), new_flags; 1157 float32 ret; 1158 1159 ret = float32_round_to_int(x, fp_status); 1160 1161 /* Suppress any inexact exceptions the conversion produced */ 1162 if (!(old_flags & float_flag_inexact)) { 1163 new_flags = get_float_exception_flags(fp_status); 1164 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 1165 } 1166 1167 return ret; 1168 } 1169 1170 float64 HELPER(rintd)(float64 x, float_status *fp_status) 1171 { 1172 int old_flags = get_float_exception_flags(fp_status), new_flags; 1173 float64 ret; 1174 1175 ret = float64_round_to_int(x, fp_status); 1176 1177 /* Suppress any inexact exceptions the conversion produced */ 1178 if (!(old_flags & float_flag_inexact)) { 1179 new_flags = get_float_exception_flags(fp_status); 1180 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 1181 } 1182 1183 return ret; 1184 } 1185 1186 /* Convert ARM rounding mode to softfloat */ 1187 const FloatRoundMode arm_rmode_to_sf_map[] = { 1188 [FPROUNDING_TIEEVEN] = float_round_nearest_even, 1189 [FPROUNDING_POSINF] = float_round_up, 1190 [FPROUNDING_NEGINF] = float_round_down, 1191 [FPROUNDING_ZERO] = float_round_to_zero, 1192 [FPROUNDING_TIEAWAY] = float_round_ties_away, 1193 [FPROUNDING_ODD] = float_round_to_odd, 1194 }; 1195 1196 /* 1197 * Implement float64 to int32_t conversion without saturation; 1198 * the result is supplied modulo 2^32. 1199 */ 1200 uint64_t HELPER(fjcvtzs)(float64 value, float_status *status) 1201 { 1202 uint32_t frac, e_old, e_new; 1203 bool inexact; 1204 1205 e_old = get_float_exception_flags(status); 1206 set_float_exception_flags(0, status); 1207 frac = float64_to_int32_modulo(value, float_round_to_zero, status); 1208 e_new = get_float_exception_flags(status); 1209 set_float_exception_flags(e_old | e_new, status); 1210 1211 /* Normal inexact, denormal with flush-to-zero, or overflow or NaN */ 1212 inexact = e_new & (float_flag_inexact | 1213 float_flag_input_denormal_flushed | 1214 float_flag_invalid); 1215 1216 /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */ 1217 inexact |= value == float64_chs(float64_zero); 1218 1219 /* Pack the result and the env->ZF representation of Z together. */ 1220 return deposit64(frac, 32, 32, inexact); 1221 } 1222 1223 uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env) 1224 { 1225 uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]); 1226 uint32_t result = pair; 1227 uint32_t z = (pair >> 32) == 0; 1228 1229 /* Store Z, clear NCV, in FPSCR.NZCV. */ 1230 env->vfp.fpsr = (env->vfp.fpsr & ~FPSR_NZCV_MASK) | (z * FPSR_Z); 1231 1232 return result; 1233 } 1234 1235 /* Round a float32 to an integer that fits in int32_t or int64_t. */ 1236 static float32 frint_s(float32 f, float_status *fpst, int intsize) 1237 { 1238 int old_flags = get_float_exception_flags(fpst); 1239 uint32_t exp = extract32(f, 23, 8); 1240 1241 if (unlikely(exp == 0xff)) { 1242 /* NaN or Inf. */ 1243 goto overflow; 1244 } 1245 1246 /* Round and re-extract the exponent. */ 1247 f = float32_round_to_int(f, fpst); 1248 exp = extract32(f, 23, 8); 1249 1250 /* Validate the range of the result. */ 1251 if (exp < 126 + intsize) { 1252 /* abs(F) <= INT{N}_MAX */ 1253 return f; 1254 } 1255 if (exp == 126 + intsize) { 1256 uint32_t sign = extract32(f, 31, 1); 1257 uint32_t frac = extract32(f, 0, 23); 1258 if (sign && frac == 0) { 1259 /* F == INT{N}_MIN */ 1260 return f; 1261 } 1262 } 1263 1264 overflow: 1265 /* 1266 * Raise Invalid and return INT{N}_MIN as a float. Revert any 1267 * inexact exception float32_round_to_int may have raised. 1268 */ 1269 set_float_exception_flags(old_flags | float_flag_invalid, fpst); 1270 return (0x100u + 126u + intsize) << 23; 1271 } 1272 1273 float32 HELPER(frint32_s)(float32 f, float_status *fpst) 1274 { 1275 return frint_s(f, fpst, 32); 1276 } 1277 1278 float32 HELPER(frint64_s)(float32 f, float_status *fpst) 1279 { 1280 return frint_s(f, fpst, 64); 1281 } 1282 1283 /* Round a float64 to an integer that fits in int32_t or int64_t. */ 1284 static float64 frint_d(float64 f, float_status *fpst, int intsize) 1285 { 1286 int old_flags = get_float_exception_flags(fpst); 1287 uint32_t exp = extract64(f, 52, 11); 1288 1289 if (unlikely(exp == 0x7ff)) { 1290 /* NaN or Inf. */ 1291 goto overflow; 1292 } 1293 1294 /* Round and re-extract the exponent. */ 1295 f = float64_round_to_int(f, fpst); 1296 exp = extract64(f, 52, 11); 1297 1298 /* Validate the range of the result. */ 1299 if (exp < 1022 + intsize) { 1300 /* abs(F) <= INT{N}_MAX */ 1301 return f; 1302 } 1303 if (exp == 1022 + intsize) { 1304 uint64_t sign = extract64(f, 63, 1); 1305 uint64_t frac = extract64(f, 0, 52); 1306 if (sign && frac == 0) { 1307 /* F == INT{N}_MIN */ 1308 return f; 1309 } 1310 } 1311 1312 overflow: 1313 /* 1314 * Raise Invalid and return INT{N}_MIN as a float. Revert any 1315 * inexact exception float64_round_to_int may have raised. 1316 */ 1317 set_float_exception_flags(old_flags | float_flag_invalid, fpst); 1318 return (uint64_t)(0x800 + 1022 + intsize) << 52; 1319 } 1320 1321 float64 HELPER(frint32_d)(float64 f, float_status *fpst) 1322 { 1323 return frint_d(f, fpst, 32); 1324 } 1325 1326 float64 HELPER(frint64_d)(float64 f, float_status *fpst) 1327 { 1328 return frint_d(f, fpst, 64); 1329 } 1330 1331 void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg) 1332 { 1333 uint32_t syndrome; 1334 1335 switch (reg) { 1336 case ARM_VFP_MVFR0: 1337 case ARM_VFP_MVFR1: 1338 case ARM_VFP_MVFR2: 1339 if (!(arm_hcr_el2_eff(env) & HCR_TID3)) { 1340 return; 1341 } 1342 break; 1343 case ARM_VFP_FPSID: 1344 if (!(arm_hcr_el2_eff(env) & HCR_TID0)) { 1345 return; 1346 } 1347 break; 1348 default: 1349 g_assert_not_reached(); 1350 } 1351 1352 syndrome = ((EC_FPIDTRAP << ARM_EL_EC_SHIFT) 1353 | ARM_EL_IL 1354 | (1 << 24) | (0xe << 20) | (7 << 14) 1355 | (reg << 10) | (rt << 5) | 1); 1356 1357 raise_exception(env, EXCP_HYP_TRAP, syndrome, 2); 1358 } 1359 1360 uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) 1361 { 1362 return vfp_get_fpscr(env); 1363 } 1364 1365 void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) 1366 { 1367 vfp_set_fpscr(env, val); 1368 } 1369