Lines Matching +full:- +full:- +full:-

5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
9 * the SoftFloat-2a license
11 * GPL-v2-or-later
14 * taken to be licensed under the Softfloat-2a license unless specifically
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
79 * version 2 or later. See the COPYING file in the top-level directory.
89 /*----------------------------------------------------------------------------
90 | Primitive arithmetic functions, including multi-word arithmetic, and
93 *----------------------------------------------------------------------------*/
94 #include "fpu/softfloat-macros.h"
103 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
109 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
110 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
119 * raised in floating-point workloads.
121 * We optimize the code further by deferring to soft-fp whenever FP exception
143 if (likely(!s->flush_inputs_to_zero)) { \ in GEN_INPUT_FLUSH__NOCHECK()
156 if (likely(!s->flush_inputs_to_zero)) { \
170 if (likely(!s->flush_inputs_to_zero)) { \
207 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
221 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
235 return likely(s->float_exception_flags & float_flag_inexact &&
236 s->float_rounding_mode == float_round_nearest_even);
269 /* 2-input is-zero-or-normal */
294 /* 3-input is-zero-or-normal */
411 float_class_denormal, /* input was a non-squashed denormal */
436 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
438 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
441 * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
481 * The fraction words are stored in big-endian word ordering,
516 uint64_t frac_hm; /* high-middle */
517 uint64_t frac_lm; /* low-middle */
553 .exp_bias = ((1 << E) - 1) >> 1, \
554 .exp_re_bias = (1 << (E - 1)) + (1 << (E - 2)), \
555 .exp_max = (1 << E) - 1
560 .frac_shift = (-F - 1) & 63, \
561 .round_mask = (1ull << ((-F - 1) & 63)) - 1
592 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
606 const int f_size = fmt->frac_size; in unpack_raw64()
607 const int e_size = fmt->exp_size; in unpack_raw64()
649 const int f_size = float128_params.frac_size - 64; in float128_unpack_raw()
664 const int f_size = fmt->frac_size; in pack_raw64()
665 const int e_size = fmt->exp_size; in pack_raw64()
668 ret = (uint64_t)p->sign << (f_size + e_size); in pack_raw64()
669 ret = deposit64(ret, f_size, e_size, p->exp); in pack_raw64()
670 ret = deposit64(ret, 0, f_size, p->frac); in pack_raw64()
696 const int f_size = float128_params.frac_size - 64; in float128_pack_raw()
700 hi = (uint64_t)p->sign << (f_size + e_size); in float128_pack_raw()
701 hi = deposit64(hi, f_size, e_size, p->exp); in float128_pack_raw()
702 hi = deposit64(hi, 0, f_size, p->frac_hi); in float128_pack_raw()
703 return make_float128(hi, p->frac_lo); in float128_pack_raw()
706 /*----------------------------------------------------------------------------
711 | are propagated from function inputs to output. These details are target-
713 *----------------------------------------------------------------------------*/
714 #include "softfloat-specialize.c.inc"
934 * Helper functions for softfloat-parts.c.inc, per-size operations.
948 return uadd64_overflow(a->frac, b->frac, &r->frac); in frac64_add()
954 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c); in frac128_add()
955 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c); in frac128_add()
962 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c); in frac256_add()
963 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c); in frac256_add()
964 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c); in frac256_add()
965 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c); in frac256_add()
973 return uadd64_overflow(a->frac, c, &r->frac); in frac64_addi()
978 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo); in frac128_addi()
979 return uadd64_overflow(a->frac_hi, c, &r->frac_hi); in frac128_addi()
986 a->frac = -1; in frac64_allones()
991 a->frac_hi = a->frac_lo = -1; in frac128_allones()
998 return (a->frac == b->frac ? float_relation_equal in frac64_cmp()
999 : a->frac < b->frac ? float_relation_less in frac64_cmp()
1005 uint64_t ta = a->frac_hi, tb = b->frac_hi; in frac128_cmp()
1007 ta = a->frac_lo, tb = b->frac_lo; in frac128_cmp()
1019 a->frac = 0; in frac64_clear()
1024 a->frac_hi = a->frac_lo = 0; in frac128_clear()
1035 * We want a 2*N / N-bit division to produce exactly an N-bit in frac64_div()
1038 * then division would produce an (N-1)-bit result; shift A left in frac64_div()
1039 * by one to produce the an N-bit result, and return true to in frac64_div()
1045 ret = a->frac < b->frac; in frac64_div()
1047 n0 = a->frac; in frac64_div()
1050 n0 = a->frac >> 1; in frac64_div()
1051 n1 = a->frac << 63; in frac64_div()
1053 q = udiv_qrnnd(&r, n0, n1, b->frac); in frac64_div()
1056 a->frac = q | (r != 0); in frac64_div()
1067 a0 = a->frac_hi, a1 = a->frac_lo; in frac128_div()
1068 b0 = b->frac_hi, b1 = b->frac_lo; in frac128_div()
1076 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */ in frac128_div()
1081 * Reduce quotient and increase remainder until remainder is non-negative. in frac128_div()
1087 q0--; in frac128_div()
1096 q1--; in frac128_div()
1103 a->frac_hi = q0; in frac128_div()
1104 a->frac_lo = q1; in frac128_div()
1112 return a->frac == 0; in frac64_eqz()
1117 return (a->frac_hi | a->frac_lo) == 0; in frac128_eqz()
1124 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac); in frac64_mulw()
1129 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo, in frac128_mulw()
1130 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo); in frac128_mulw()
1137 a->frac = -a->frac; in frac64_neg()
1143 a->frac_lo = usub64_borrow(0, a->frac_lo, &c); in frac128_neg()
1144 a->frac_hi = usub64_borrow(0, a->frac_hi, &c); in frac128_neg()
1150 a->frac_lo = usub64_borrow(0, a->frac_lo, &c); in frac256_neg()
1151 a->frac_lm = usub64_borrow(0, a->frac_lm, &c); in frac256_neg()
1152 a->frac_hm = usub64_borrow(0, a->frac_hm, &c); in frac256_neg()
1153 a->frac_hi = usub64_borrow(0, a->frac_hi, &c); in frac256_neg()
1160 if (a->frac) { in frac64_normalize()
1161 int shift = clz64(a->frac); in frac64_normalize()
1162 a->frac <<= shift; in frac64_normalize()
1170 if (a->frac_hi) { in frac128_normalize()
1171 int shl = clz64(a->frac_hi); in frac128_normalize()
1172 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl); in frac128_normalize()
1173 a->frac_lo <<= shl; in frac128_normalize()
1175 } else if (a->frac_lo) { in frac128_normalize()
1176 int shl = clz64(a->frac_lo); in frac128_normalize()
1177 a->frac_hi = a->frac_lo << shl; in frac128_normalize()
1178 a->frac_lo = 0; in frac128_normalize()
1186 uint64_t a0 = a->frac_hi, a1 = a->frac_hm; in frac256_normalize()
1187 uint64_t a2 = a->frac_lm, a3 = a->frac_lo; in frac256_normalize()
1224 a->frac_hi = a0; in frac256_normalize()
1225 a->frac_hm = a1; in frac256_normalize()
1226 a->frac_lm = a2; in frac256_normalize()
1227 a->frac_lo = a3; in frac256_normalize()
1236 int exp_diff = a->exp - b->exp; in frac64_modrem()
1239 a0 = a->frac; in frac64_modrem()
1242 if (exp_diff < -1) { in frac64_modrem()
1248 if (exp_diff == -1) { in frac64_modrem()
1253 b0 = b->frac; in frac64_modrem()
1256 a0 -= b0; in frac64_modrem()
1259 exp_diff -= 64; in frac64_modrem()
1262 q = q > 2 ? q - 2 : 0; in frac64_modrem()
1266 exp_diff -= 62; in frac64_modrem()
1273 q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0; in frac64_modrem()
1274 mul64To128(b0, q << (64 - exp_diff), &t0, &t1); in frac64_modrem()
1276 shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1); in frac64_modrem()
1295 a->sign = !a->sign; in frac64_modrem()
1308 a->cls = float_class_zero; in frac64_modrem()
1312 a->exp = b->exp + exp_diff - shift; in frac64_modrem()
1313 a->frac = a0 | (a1 != 0); in frac64_modrem()
1320 int exp_diff = a->exp - b->exp; in frac128_modrem()
1323 a0 = a->frac_hi; in frac128_modrem()
1324 a1 = a->frac_lo; in frac128_modrem()
1327 if (exp_diff < -1) { in frac128_modrem()
1333 if (exp_diff == -1) { in frac128_modrem()
1338 b0 = b->frac_hi; in frac128_modrem()
1339 b1 = b->frac_lo; in frac128_modrem()
1346 exp_diff -= 64; in frac128_modrem()
1349 q = q > 4 ? q - 4 : 0; in frac128_modrem()
1353 exp_diff -= 61; in frac128_modrem()
1360 q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0; in frac128_modrem()
1361 mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2); in frac128_modrem()
1363 shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2); in frac128_modrem()
1384 a->sign = !a->sign; in frac128_modrem()
1402 a->cls = float_class_zero; in frac128_modrem()
1406 a->exp = b->exp + exp_diff - shift; in frac128_modrem()
1407 a->frac_hi = a0; in frac128_modrem()
1408 a->frac_lo = a1 | (a2 != 0); in frac128_modrem()
1415 a->frac <<= c; in frac64_shl()
1420 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; in frac128_shl()
1432 a->frac_hi = a0; in frac128_shl()
1433 a->frac_lo = a1; in frac128_shl()
1440 a->frac >>= c; in frac64_shr()
1445 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; in frac128_shr()
1457 a->frac_hi = a0; in frac128_shr()
1458 a->frac_lo = a1; in frac128_shr()
1465 uint64_t a0 = a->frac; in frac64_shrjam()
1473 a->frac = a0; in frac64_shrjam()
1479 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; in frac128_shrjam()
1505 a->frac_lo = a1 | (sticky != 0); in frac128_shrjam()
1506 a->frac_hi = a0; in frac128_shrjam()
1511 uint64_t a0 = a->frac_hi, a1 = a->frac_hm; in frac256_shrjam()
1512 uint64_t a2 = a->frac_lm, a3 = a->frac_lo; in frac256_shrjam()
1545 a->frac_lo = a3 | (sticky != 0); in frac256_shrjam()
1546 a->frac_lm = a2; in frac256_shrjam()
1547 a->frac_hm = a1; in frac256_shrjam()
1548 a->frac_hi = a0; in frac256_shrjam()
1555 return usub64_overflow(a->frac, b->frac, &r->frac); in frac64_sub()
1561 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c); in frac128_sub()
1562 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c); in frac128_sub()
1569 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c); in frac256_sub()
1570 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c); in frac256_sub()
1571 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c); in frac256_sub()
1572 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c); in frac256_sub()
1580 r->frac = a->frac_hi | (a->frac_lo != 0); in frac64_truncjam()
1585 r->frac_hi = a->frac_hi; in frac128_truncjam()
1586 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0); in frac128_truncjam()
1593 r->frac_hi = a->frac; in frac64_widen()
1594 r->frac_lo = 0; in frac64_widen()
1599 r->frac_hi = a->frac_hi; in frac128_widen()
1600 r->frac_hm = a->frac_lo; in frac128_widen()
1601 r->frac_lm = 0; in frac128_widen()
1602 r->frac_lo = 0; in frac128_widen()
1608 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1609 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1638 #include "softfloat-parts-addsub.c.inc"
1639 #include "softfloat-parts.c.inc"
1646 #include "softfloat-parts-addsub.c.inc"
1647 #include "softfloat-parts.c.inc"
1653 #include "softfloat-parts-addsub.c.inc"
1744 switch (p->cls) { in float64r32_round_pack_canonical()
1747 if (unlikely(p->exp == 0)) { in float64r32_round_pack_canonical()
1753 p->exp = (float32_params.frac_shift - in float64r32_round_pack_canonical()
1754 float32_params.exp_bias - shift + 1 + in float64r32_round_pack_canonical()
1758 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift); in float64r32_round_pack_canonical()
1759 p->exp += float64_params.exp_bias - float32_params.exp_bias; in float64r32_round_pack_canonical()
1764 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift); in float64r32_round_pack_canonical()
1765 p->exp = float64_params.exp_max; in float64r32_round_pack_canonical()
1768 p->exp = float64_params.exp_max; in float64r32_round_pack_canonical()
1798 switch (s->floatx80_rounding_precision) { in floatx80_unpack_canonical()
1814 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) { in floatx80_unpack_canonical()
1818 p->frac_hi &= MAKE_64BIT_MASK(0, 63); in floatx80_unpack_canonical()
1819 p->cls = (p->frac_hi == 0 ? float_class_inf in floatx80_unpack_canonical()
1820 : parts_is_snan_frac(p->frac_hi, s) in floatx80_unpack_canonical()
1829 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision]; in floatx80_round_pack_canonical()
1833 switch (p->cls) { in floatx80_round_pack_canonical()
1836 if (s->floatx80_rounding_precision == floatx80_precision_x) { in floatx80_round_pack_canonical()
1838 frac = p->frac_hi; in floatx80_round_pack_canonical()
1839 exp = p->exp; in floatx80_round_pack_canonical()
1843 p64.sign = p->sign; in floatx80_round_pack_canonical()
1844 p64.exp = p->exp; in floatx80_round_pack_canonical()
1850 if (exp != fmt->exp_max) { in floatx80_round_pack_canonical()
1853 /* rounded to inf -- fall through to set frac correctly */ in floatx80_round_pack_canonical()
1857 frac = s->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero ? in floatx80_round_pack_canonical()
1859 exp = fmt->exp_max; in floatx80_round_pack_canonical()
1870 frac = p->frac_hi | (1ull << 63); in floatx80_round_pack_canonical()
1871 exp = fmt->exp_max; in floatx80_round_pack_canonical()
1878 return packFloatx80(p->sign, exp, frac); in floatx80_round_pack_canonical()
1958 return a - b; in hard_f32_sub()
1968 return a - b; in hard_f64_sub()
2229 * Fused multiply-add
2320 uc.h = -uc.h; in float32_muladd()
2328 ua.h = -ua.h; in float32_muladd()
2331 uc.h = -uc.h; in float32_muladd()
2388 uc.h = -uc.h; in float64_muladd()
2396 ua.h = -ua.h; in float64_muladd()
2399 uc.h = -uc.h; in float64_muladd()
2643 * Returns the remainder of the extended double-precision floating-point value
2646 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2683 * Binary Floating-Point Arithmetic.
2691 switch (a->cls) { in parts_float_to_ahp()
2701 a->cls = float_class_zero; in parts_float_to_ahp()
2710 a->cls = float_class_normal; in parts_float_to_ahp()
2711 a->exp = float16_params_ahp.exp_max; in parts_float_to_ahp()
2712 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift, in parts_float_to_ahp()
2730 if (is_nan(a->cls)) { in parts64_float_to_float()
2733 if (a->cls == float_class_denormal) { in parts64_float_to_float()
2740 if (is_nan(a->cls)) { in parts128_float_to_float()
2743 if (a->cls == float_class_denormal) { in parts128_float_to_float()
2754 a->cls = b->cls; in parts_float_to_float_narrow()
2755 a->sign = b->sign; in parts_float_to_float_narrow()
2756 a->exp = b->exp; in parts_float_to_float_narrow()
2758 switch (a->cls) { in parts_float_to_float_narrow()
2768 a->frac = b->frac_hi; in parts_float_to_float_narrow()
2779 a->cls = b->cls; in parts_float_to_float_widen()
2780 a->sign = b->sign; in parts_float_to_float_widen()
2781 a->exp = b->exp; in parts_float_to_float_widen()
2784 if (is_nan(a->cls)) { in parts_float_to_float_widen()
2787 if (a->cls == float_class_denormal) { in parts_float_to_float_widen()
3031 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params); in float16_round_to_int()
3040 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params); in float32_round_to_int()
3049 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params); in float64_round_to_int()
3058 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params); in bfloat16_round_to_int()
3067 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params); in float128_round_to_int()
3079 parts_round_to_int(&p, status->float_rounding_mode, 0, status, in floatx80_round_to_int()
3080 &floatx80_params[status->floatx80_rounding_precision]); in floatx80_round_to_int()
3085 * Floating-point to signed integer conversions
3260 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { in float128_to_int128_scalbn()
3265 int shift = 127 - p.exp; in float128_to_int128_scalbn()
3311 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s); in float16_to_int8()
3316 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); in float16_to_int16()
3321 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); in float16_to_int32()
3326 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); in float16_to_int64()
3331 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s); in float32_to_int16()
3336 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s); in float32_to_int32()
3341 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s); in float32_to_int64()
3346 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s); in float64_to_int16()
3351 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s); in float64_to_int32()
3356 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s); in float64_to_int64()
3361 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s); in float128_to_int32()
3366 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s); in float128_to_int64()
3371 return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s); in float128_to_int128()
3376 return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s); in floatx80_to_int32()
3381 return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s); in floatx80_to_int64()
3456 return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s); in bfloat16_to_int8()
3461 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); in bfloat16_to_int16()
3466 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); in bfloat16_to_int32()
3471 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); in bfloat16_to_int64()
3513 * Floating-point to unsigned integer conversions
3688 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { in float128_to_uint128_scalbn()
3700 int shift = 127 - p.exp; in float128_to_uint128_scalbn()
3718 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s); in float16_to_uint8()
3723 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); in float16_to_uint16()
3728 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); in float16_to_uint32()
3733 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); in float16_to_uint64()
3738 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); in float32_to_uint16()
3743 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); in float32_to_uint32()
3748 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); in float32_to_uint64()
3753 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); in float64_to_uint16()
3758 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); in float64_to_uint32()
3763 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); in float64_to_uint64()
3768 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); in float128_to_uint32()
3773 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); in float128_to_uint64()
3778 return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s); in float128_to_uint128()
3843 return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s); in bfloat16_to_uint8()
3848 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); in bfloat16_to_uint16()
3853 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); in bfloat16_to_uint32()
3858 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); in bfloat16_to_uint64()
3882 * Signed integer to floating-point conversions
4063 p.exp = 127 - shift; in int128_to_float128()
4102 * Unsigned Integer to floating-point conversions
4287 p.exp = 127 - shift; in uint128_to_float128()
4758 parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]); in floatx80_sqrt()
4783 /*----------------------------------------------------------------------------
4785 *----------------------------------------------------------------------------*/
4832 /*----------------------------------------------------------------------------
4834 *----------------------------------------------------------------------------*/
4891 /*----------------------------------------------------------------------------
4892 | If `a' is denormal and we are in flush-to-zero mode then set the
4893 | input-denormal exception and return zero. Otherwise just return the value.
4894 *----------------------------------------------------------------------------*/
4908 if (status->flush_inputs_to_zero) { in float16_squash_input_denormal()
4921 if (status->flush_inputs_to_zero) { in float32_squash_input_denormal()
4934 if (status->flush_inputs_to_zero) { in float64_squash_input_denormal()
4947 if (status->flush_inputs_to_zero) { in bfloat16_squash_input_denormal()
4958 /*----------------------------------------------------------------------------
4959 | Normalizes the subnormal extended double-precision floating-point value
4963 *----------------------------------------------------------------------------*/
4972 *zExpPtr = 1 - shiftCount; in normalizeFloatx80Subnormal()
4975 /*----------------------------------------------------------------------------
4976 | Takes two extended double-precision floating-point values `a' and `b', one
4979 *----------------------------------------------------------------------------*/
4994 /*----------------------------------------------------------------------------
4995 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4997 | and returns the proper extended double-precision floating-point value
4999 | rounded and packed into the extended double-precision format, with the
5006 | double-precision floating-point number.
5010 | precision of the extended double-precision format.
5015 | Floating-Point Arithmetic.
5016 *----------------------------------------------------------------------------*/
5026 roundingMode = status->float_rounding_mode; in roundAndPackFloatx80()
5060 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { in roundAndPackFloatx80()
5067 if (status->flush_to_zero) { in roundAndPackFloatx80()
5071 isTiny = status->tininess_before_rounding in roundAndPackFloatx80()
5074 shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); in roundAndPackFloatx80()
5126 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { in roundAndPackFloatx80()
5145 isTiny = status->tininess_before_rounding in roundAndPackFloatx80()
5149 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); in roundAndPackFloatx80()
5206 /*----------------------------------------------------------------------------
5207 | Takes an abstract floating-point value having sign `zSign', exponent
5209 | and returns the proper extended double-precision floating-point value
5213 *----------------------------------------------------------------------------*/
5225 zExp -= 64; in normalizeRoundAndPackFloatx80()
5229 zExp -= shiftCount; in normalizeRoundAndPackFloatx80()
5235 /*----------------------------------------------------------------------------
5236 | Returns the binary exponential of the single-precision floating-point value
5238 | Binary Floating-Point Arithmetic.
5242 | 1. -------------------------------------------------------------------------
5246 | 2. -------------------------------------------------------------------------
5249 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5251 *----------------------------------------------------------------------------*/
5312 /*----------------------------------------------------------------------------
5313 | Rounds the extended double-precision floating-point value `a'
5315 | result as an extended double-precision floating-point value.
5317 | Floating-Point Arithmetic.
5318 *----------------------------------------------------------------------------*/