1 /* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18 /* 19 =============================================================================== 20 This C source file is part of the SoftFloat IEC/IEEE Floating-point 21 Arithmetic Package, Release 2a. 22 23 Written by John R. Hauser. This work was made possible in part by the 24 International Computer Science Institute, located at Suite 600, 1947 Center 25 Street, Berkeley, California 94704. Funding was partially provided by the 26 National Science Foundation under grant MIP-9311980. The original version 27 of this code was written as part of a project to build a fixed-point vector 28 processor in collaboration with the University of California at Berkeley, 29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 31 arithmetic/SoftFloat.html'. 32 33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 38 39 Derivative works are acceptable, even for commercial purposes, so long as 40 (1) they include prominent notice that the work is derivative, and (2) they 41 include prominent notice akin to these four paragraphs for those parts of 42 this code that are retained. 43 44 =============================================================================== 45 */ 46 47 /* BSD licensing: 48 * Copyright (c) 2006, Fabrice Bellard 49 * All rights reserved. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions are met: 53 * 54 * 1. Redistributions of source code must retain the above copyright notice, 55 * this list of conditions and the following disclaimer. 56 * 57 * 2. Redistributions in binary form must reproduce the above copyright notice, 58 * this list of conditions and the following disclaimer in the documentation 59 * and/or other materials provided with the distribution. 60 * 61 * 3. Neither the name of the copyright holder nor the names of its contributors 62 * may be used to endorse or promote products derived from this software without 63 * specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 75 * THE POSSIBILITY OF SUCH DAMAGE. 76 */ 77 78 /* Portions of this work are licensed under the terms of the GNU GPL, 79 * version 2 or later. See the COPYING file in the top-level directory. 80 */ 81 82 /* softfloat (and in particular the code in softfloat-specialize.h) is 83 * target-dependent and needs the TARGET_* macros. 84 */ 85 #include "qemu/osdep.h" 86 #include <math.h> 87 #include "qemu/bitops.h" 88 #include "fpu/softfloat.h" 89 90 /* We only need stdlib for abort() */ 91 92 /*---------------------------------------------------------------------------- 93 | Primitive arithmetic functions, including multi-word arithmetic, and 94 | division and square root approximations. (Can be specialized to target if 95 | desired.) 96 *----------------------------------------------------------------------------*/ 97 #include "fpu/softfloat-macros.h" 98 99 /* 100 * Hardfloat 101 * 102 * Fast emulation of guest FP instructions is challenging for two reasons. 103 * First, FP instruction semantics are similar but not identical, particularly 104 * when handling NaNs. Second, emulating at reasonable speed the guest FP 105 * exception flags is not trivial: reading the host's flags register with a 106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp], 107 * and trapping on every FP exception is not fast nor pleasant to work with. 108 * 109 * We address these challenges by leveraging the host FPU for a subset of the 110 * operations. To do this we expand on the idea presented in this paper: 111 * 112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a 113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615. 114 * 115 * The idea is thus to leverage the host FPU to (1) compute FP operations 116 * and (2) identify whether FP exceptions occurred while avoiding 117 * expensive exception flag register accesses. 118 * 119 * An important optimization shown in the paper is that given that exception 120 * flags are rarely cleared by the guest, we can avoid recomputing some flags. 121 * This is particularly useful for the inexact flag, which is very frequently 122 * raised in floating-point workloads. 123 * 124 * We optimize the code further by deferring to soft-fp whenever FP exception 125 * detection might get hairy. Two examples: (1) when at least one operand is 126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result 127 * and the result is < the minimum normal. 128 */ 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \ 130 static inline void name(soft_t *a, float_status *s) \ 131 { \ 132 if (unlikely(soft_t ## _is_denormal(*a))) { \ 133 *a = soft_t ## _set_sign(soft_t ## _zero, \ 134 soft_t ## _is_neg(*a)); \ 135 float_raise(float_flag_input_denormal_flushed, s); \ 136 } \ 137 } 138 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32) 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64) 141 #undef GEN_INPUT_FLUSH__NOCHECK 142 143 #define GEN_INPUT_FLUSH1(name, soft_t) \ 144 static inline void name(soft_t *a, float_status *s) \ 145 { \ 146 if (likely(!s->flush_inputs_to_zero)) { \ 147 return; \ 148 } \ 149 soft_t ## _input_flush__nocheck(a, s); \ 150 } 151 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32) 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64) 154 #undef GEN_INPUT_FLUSH1 155 156 #define GEN_INPUT_FLUSH2(name, soft_t) \ 157 static inline void name(soft_t *a, soft_t *b, float_status *s) \ 158 { \ 159 if (likely(!s->flush_inputs_to_zero)) { \ 160 return; \ 161 } \ 162 soft_t ## _input_flush__nocheck(a, s); \ 163 soft_t ## _input_flush__nocheck(b, s); \ 164 } 165 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32) 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64) 168 #undef GEN_INPUT_FLUSH2 169 170 #define GEN_INPUT_FLUSH3(name, soft_t) \ 171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \ 172 { \ 173 if (likely(!s->flush_inputs_to_zero)) { \ 174 return; \ 175 } \ 176 soft_t ## _input_flush__nocheck(a, s); \ 177 soft_t ## _input_flush__nocheck(b, s); \ 178 soft_t ## _input_flush__nocheck(c, s); \ 179 } 180 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32) 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64) 183 #undef GEN_INPUT_FLUSH3 184 185 /* 186 * Choose whether to use fpclassify or float32/64_* primitives in the generated 187 * hardfloat functions. Each combination of number of inputs and float size 188 * gets its own value. 189 */ 190 #if defined(__x86_64__) 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1 197 #else 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0 204 #endif 205 206 /* 207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over 208 * float{32,64}_is_infinity when !USE_FP. 209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup. 210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%. 211 */ 212 #if defined(__x86_64__) || defined(__aarch64__) 213 # define QEMU_HARDFLOAT_USE_ISINF 1 214 #else 215 # define QEMU_HARDFLOAT_USE_ISINF 0 216 #endif 217 218 /* 219 * Some targets clear the FP flags before most FP operations. This prevents 220 * the use of hardfloat, since hardfloat relies on the inexact flag being 221 * already set. 222 */ 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__) 224 # if defined(__FAST_MATH__) 225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \ 226 IEEE implementation 227 # endif 228 # define QEMU_NO_HARDFLOAT 1 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN 230 #else 231 # define QEMU_NO_HARDFLOAT 0 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline)) 233 #endif 234 235 static inline bool can_use_fpu(const float_status *s) 236 { 237 if (QEMU_NO_HARDFLOAT) { 238 return false; 239 } 240 return likely(s->float_exception_flags & float_flag_inexact && 241 s->float_rounding_mode == float_round_nearest_even); 242 } 243 244 /* 245 * Hardfloat generation functions. Each operation can have two flavors: 246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for 247 * most condition checks, or native ones (e.g. fpclassify). 248 * 249 * The flavor is chosen by the callers. Instead of using macros, we rely on the 250 * compiler to propagate constants and inline everything into the callers. 251 * 252 * We only generate functions for operations with two inputs, since only 253 * these are common enough to justify consolidating them into common code. 254 */ 255 256 typedef union { 257 float32 s; 258 float h; 259 } union_float32; 260 261 typedef union { 262 float64 s; 263 double h; 264 } union_float64; 265 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b); 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b); 268 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s); 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s); 271 typedef float (*hard_f32_op2_fn)(float a, float b); 272 typedef double (*hard_f64_op2_fn)(double a, double b); 273 274 /* 2-input is-zero-or-normal */ 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b) 276 { 277 if (QEMU_HARDFLOAT_2F32_USE_FP) { 278 /* 279 * Not using a temp variable for consecutive fpclassify calls ends up 280 * generating faster code. 281 */ 282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO); 284 } 285 return float32_is_zero_or_normal(a.s) && 286 float32_is_zero_or_normal(b.s); 287 } 288 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b) 290 { 291 if (QEMU_HARDFLOAT_2F64_USE_FP) { 292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO); 294 } 295 return float64_is_zero_or_normal(a.s) && 296 float64_is_zero_or_normal(b.s); 297 } 298 299 /* 3-input is-zero-or-normal */ 300 static inline 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c) 302 { 303 if (QEMU_HARDFLOAT_3F32_USE_FP) { 304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) && 306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO); 307 } 308 return float32_is_zero_or_normal(a.s) && 309 float32_is_zero_or_normal(b.s) && 310 float32_is_zero_or_normal(c.s); 311 } 312 313 static inline 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c) 315 { 316 if (QEMU_HARDFLOAT_3F64_USE_FP) { 317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) && 319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO); 320 } 321 return float64_is_zero_or_normal(a.s) && 322 float64_is_zero_or_normal(b.s) && 323 float64_is_zero_or_normal(c.s); 324 } 325 326 static inline bool f32_is_inf(union_float32 a) 327 { 328 if (QEMU_HARDFLOAT_USE_ISINF) { 329 return isinf(a.h); 330 } 331 return float32_is_infinity(a.s); 332 } 333 334 static inline bool f64_is_inf(union_float64 a) 335 { 336 if (QEMU_HARDFLOAT_USE_ISINF) { 337 return isinf(a.h); 338 } 339 return float64_is_infinity(a.s); 340 } 341 342 static inline float32 343 float32_gen2(float32 xa, float32 xb, float_status *s, 344 hard_f32_op2_fn hard, soft_f32_op2_fn soft, 345 f32_check_fn pre, f32_check_fn post) 346 { 347 union_float32 ua, ub, ur; 348 349 ua.s = xa; 350 ub.s = xb; 351 352 if (unlikely(!can_use_fpu(s))) { 353 goto soft; 354 } 355 356 float32_input_flush2(&ua.s, &ub.s, s); 357 if (unlikely(!pre(ua, ub))) { 358 goto soft; 359 } 360 361 ur.h = hard(ua.h, ub.h); 362 if (unlikely(f32_is_inf(ur))) { 363 float_raise(float_flag_overflow, s); 364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) { 365 goto soft; 366 } 367 return ur.s; 368 369 soft: 370 return soft(ua.s, ub.s, s); 371 } 372 373 static inline float64 374 float64_gen2(float64 xa, float64 xb, float_status *s, 375 hard_f64_op2_fn hard, soft_f64_op2_fn soft, 376 f64_check_fn pre, f64_check_fn post) 377 { 378 union_float64 ua, ub, ur; 379 380 ua.s = xa; 381 ub.s = xb; 382 383 if (unlikely(!can_use_fpu(s))) { 384 goto soft; 385 } 386 387 float64_input_flush2(&ua.s, &ub.s, s); 388 if (unlikely(!pre(ua, ub))) { 389 goto soft; 390 } 391 392 ur.h = hard(ua.h, ub.h); 393 if (unlikely(f64_is_inf(ur))) { 394 float_raise(float_flag_overflow, s); 395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) { 396 goto soft; 397 } 398 return ur.s; 399 400 soft: 401 return soft(ua.s, ub.s, s); 402 } 403 404 /* 405 * Classify a floating point number. Everything above float_class_qnan 406 * is a NaN so cls >= float_class_qnan is any NaN. 407 * 408 * Note that we canonicalize denormals, so most code should treat 409 * class_normal and class_denormal identically. 410 */ 411 412 typedef enum __attribute__ ((__packed__)) { 413 float_class_unclassified, 414 float_class_zero, 415 float_class_normal, 416 float_class_denormal, /* input was a non-squashed denormal */ 417 float_class_inf, 418 float_class_qnan, /* all NaNs from here */ 419 float_class_snan, 420 } FloatClass; 421 422 #define float_cmask(bit) (1u << (bit)) 423 424 enum { 425 float_cmask_zero = float_cmask(float_class_zero), 426 float_cmask_normal = float_cmask(float_class_normal), 427 float_cmask_denormal = float_cmask(float_class_denormal), 428 float_cmask_inf = float_cmask(float_class_inf), 429 float_cmask_qnan = float_cmask(float_class_qnan), 430 float_cmask_snan = float_cmask(float_class_snan), 431 432 float_cmask_infzero = float_cmask_zero | float_cmask_inf, 433 float_cmask_anynan = float_cmask_qnan | float_cmask_snan, 434 float_cmask_anynorm = float_cmask_normal | float_cmask_denormal, 435 }; 436 437 /* Flags for parts_minmax. */ 438 enum { 439 /* Set for minimum; clear for maximum. */ 440 minmax_ismin = 1, 441 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */ 442 minmax_isnum = 2, 443 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */ 444 minmax_ismag = 4, 445 /* 446 * Set for the IEEE 754-2019 minimumNumber() and maximumNumber() 447 * operations. 448 */ 449 minmax_isnumber = 8, 450 }; 451 452 /* Simple helpers for checking if, or what kind of, NaN we have */ 453 static inline __attribute__((unused)) bool is_nan(FloatClass c) 454 { 455 return unlikely(c >= float_class_qnan); 456 } 457 458 static inline __attribute__((unused)) bool is_snan(FloatClass c) 459 { 460 return c == float_class_snan; 461 } 462 463 static inline __attribute__((unused)) bool is_qnan(FloatClass c) 464 { 465 return c == float_class_qnan; 466 } 467 468 /* 469 * Return true if the float_cmask has only normals in it 470 * (including input denormals that were canonicalized) 471 */ 472 static inline bool cmask_is_only_normals(int cmask) 473 { 474 return !(cmask & ~float_cmask_anynorm); 475 } 476 477 static inline bool is_anynorm(FloatClass c) 478 { 479 return float_cmask(c) & float_cmask_anynorm; 480 } 481 482 /* 483 * Structure holding all of the decomposed parts of a float. 484 * The exponent is unbiased and the fraction is normalized. 485 * 486 * The fraction words are stored in big-endian word ordering, 487 * so that truncation from a larger format to a smaller format 488 * can be done simply by ignoring subsequent elements. 489 */ 490 491 typedef struct { 492 FloatClass cls; 493 bool sign; 494 int32_t exp; 495 union { 496 /* Routines that know the structure may reference the singular name. */ 497 uint64_t frac; 498 /* 499 * Routines expanded with multiple structures reference "hi" and "lo" 500 * depending on the operation. In FloatParts64, "hi" and "lo" are 501 * both the same word and aliased here. 502 */ 503 uint64_t frac_hi; 504 uint64_t frac_lo; 505 }; 506 } FloatParts64; 507 508 typedef struct { 509 FloatClass cls; 510 bool sign; 511 int32_t exp; 512 uint64_t frac_hi; 513 uint64_t frac_lo; 514 } FloatParts128; 515 516 typedef struct { 517 FloatClass cls; 518 bool sign; 519 int32_t exp; 520 uint64_t frac_hi; 521 uint64_t frac_hm; /* high-middle */ 522 uint64_t frac_lm; /* low-middle */ 523 uint64_t frac_lo; 524 } FloatParts256; 525 526 /* These apply to the most significant word of each FloatPartsN. */ 527 #define DECOMPOSED_BINARY_POINT 63 528 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT) 529 530 /* Structure holding all of the relevant parameters for a format. 531 * exp_size: the size of the exponent field 532 * exp_bias: the offset applied to the exponent field 533 * exp_max: the maximum normalised exponent 534 * frac_size: the size of the fraction field 535 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT 536 * The following are computed based the size of fraction 537 * round_mask: bits below lsb which must be rounded 538 * The following optional modifiers are available: 539 * arm_althp: handle ARM Alternative Half Precision 540 * m68k_denormal: explicit integer bit for extended precision may be 1 541 */ 542 typedef struct { 543 int exp_size; 544 int exp_bias; 545 int exp_re_bias; 546 int exp_max; 547 int frac_size; 548 int frac_shift; 549 bool arm_althp; 550 bool m68k_denormal; 551 uint64_t round_mask; 552 } FloatFmt; 553 554 /* Expand fields based on the size of exponent and fraction */ 555 #define FLOAT_PARAMS_(E) \ 556 .exp_size = E, \ 557 .exp_bias = ((1 << E) - 1) >> 1, \ 558 .exp_re_bias = (1 << (E - 1)) + (1 << (E - 2)), \ 559 .exp_max = (1 << E) - 1 560 561 #define FLOAT_PARAMS(E, F) \ 562 FLOAT_PARAMS_(E), \ 563 .frac_size = F, \ 564 .frac_shift = (-F - 1) & 63, \ 565 .round_mask = (1ull << ((-F - 1) & 63)) - 1 566 567 static const FloatFmt float16_params = { 568 FLOAT_PARAMS(5, 10) 569 }; 570 571 static const FloatFmt float16_params_ahp = { 572 FLOAT_PARAMS(5, 10), 573 .arm_althp = true 574 }; 575 576 static const FloatFmt bfloat16_params = { 577 FLOAT_PARAMS(8, 7) 578 }; 579 580 static const FloatFmt float32_params = { 581 FLOAT_PARAMS(8, 23) 582 }; 583 584 static const FloatFmt float64_params = { 585 FLOAT_PARAMS(11, 52) 586 }; 587 588 static const FloatFmt float128_params = { 589 FLOAT_PARAMS(15, 112) 590 }; 591 592 #define FLOATX80_PARAMS(R) \ 593 FLOAT_PARAMS_(15), \ 594 .frac_size = R == 64 ? 63 : R, \ 595 .frac_shift = 0, \ 596 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1 597 598 static const FloatFmt floatx80_params[3] = { 599 [floatx80_precision_s] = { FLOATX80_PARAMS(23) }, 600 [floatx80_precision_d] = { FLOATX80_PARAMS(52) }, 601 [floatx80_precision_x] = { 602 FLOATX80_PARAMS(64), 603 #ifdef TARGET_M68K 604 .m68k_denormal = true, 605 #endif 606 }, 607 }; 608 609 /* Unpack a float to parts, but do not canonicalize. */ 610 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw) 611 { 612 const int f_size = fmt->frac_size; 613 const int e_size = fmt->exp_size; 614 615 *r = (FloatParts64) { 616 .cls = float_class_unclassified, 617 .sign = extract64(raw, f_size + e_size, 1), 618 .exp = extract64(raw, f_size, e_size), 619 .frac = extract64(raw, 0, f_size) 620 }; 621 } 622 623 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f) 624 { 625 unpack_raw64(p, &float16_params, f); 626 } 627 628 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f) 629 { 630 unpack_raw64(p, &bfloat16_params, f); 631 } 632 633 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f) 634 { 635 unpack_raw64(p, &float32_params, f); 636 } 637 638 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f) 639 { 640 unpack_raw64(p, &float64_params, f); 641 } 642 643 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f) 644 { 645 *p = (FloatParts128) { 646 .cls = float_class_unclassified, 647 .sign = extract32(f.high, 15, 1), 648 .exp = extract32(f.high, 0, 15), 649 .frac_hi = f.low 650 }; 651 } 652 653 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f) 654 { 655 const int f_size = float128_params.frac_size - 64; 656 const int e_size = float128_params.exp_size; 657 658 *p = (FloatParts128) { 659 .cls = float_class_unclassified, 660 .sign = extract64(f.high, f_size + e_size, 1), 661 .exp = extract64(f.high, f_size, e_size), 662 .frac_hi = extract64(f.high, 0, f_size), 663 .frac_lo = f.low, 664 }; 665 } 666 667 /* Pack a float from parts, but do not canonicalize. */ 668 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt) 669 { 670 const int f_size = fmt->frac_size; 671 const int e_size = fmt->exp_size; 672 uint64_t ret; 673 674 ret = (uint64_t)p->sign << (f_size + e_size); 675 ret = deposit64(ret, f_size, e_size, p->exp); 676 ret = deposit64(ret, 0, f_size, p->frac); 677 return ret; 678 } 679 680 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p) 681 { 682 return make_float16(pack_raw64(p, &float16_params)); 683 } 684 685 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p) 686 { 687 return pack_raw64(p, &bfloat16_params); 688 } 689 690 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p) 691 { 692 return make_float32(pack_raw64(p, &float32_params)); 693 } 694 695 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p) 696 { 697 return make_float64(pack_raw64(p, &float64_params)); 698 } 699 700 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p) 701 { 702 const int f_size = float128_params.frac_size - 64; 703 const int e_size = float128_params.exp_size; 704 uint64_t hi; 705 706 hi = (uint64_t)p->sign << (f_size + e_size); 707 hi = deposit64(hi, f_size, e_size, p->exp); 708 hi = deposit64(hi, 0, f_size, p->frac_hi); 709 return make_float128(hi, p->frac_lo); 710 } 711 712 /*---------------------------------------------------------------------------- 713 | Functions and definitions to determine: (1) whether tininess for underflow 714 | is detected before or after rounding by default, (2) what (if anything) 715 | happens when exceptions are raised, (3) how signaling NaNs are distinguished 716 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs 717 | are propagated from function inputs to output. These details are target- 718 | specific. 719 *----------------------------------------------------------------------------*/ 720 #include "softfloat-specialize.c.inc" 721 722 #define PARTS_GENERIC_64_128(NAME, P) \ 723 _Generic((P), FloatParts64 *: parts64_##NAME, \ 724 FloatParts128 *: parts128_##NAME) 725 726 #define PARTS_GENERIC_64_128_256(NAME, P) \ 727 _Generic((P), FloatParts64 *: parts64_##NAME, \ 728 FloatParts128 *: parts128_##NAME, \ 729 FloatParts256 *: parts256_##NAME) 730 731 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S) 732 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S) 733 734 static void parts64_return_nan(FloatParts64 *a, float_status *s); 735 static void parts128_return_nan(FloatParts128 *a, float_status *s); 736 737 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S) 738 739 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b, 740 float_status *s); 741 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b, 742 float_status *s); 743 744 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S) 745 746 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b, 747 FloatParts64 *c, float_status *s, 748 int ab_mask, int abc_mask); 749 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a, 750 FloatParts128 *b, 751 FloatParts128 *c, 752 float_status *s, 753 int ab_mask, int abc_mask); 754 755 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \ 756 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM) 757 758 static void parts64_canonicalize(FloatParts64 *p, float_status *status, 759 const FloatFmt *fmt); 760 static void parts128_canonicalize(FloatParts128 *p, float_status *status, 761 const FloatFmt *fmt); 762 763 #define parts_canonicalize(A, S, F) \ 764 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F) 765 766 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status, 767 const FloatFmt *fmt); 768 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status, 769 const FloatFmt *fmt); 770 771 #define parts_uncanon_normal(A, S, F) \ 772 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F) 773 774 static void parts64_uncanon(FloatParts64 *p, float_status *status, 775 const FloatFmt *fmt); 776 static void parts128_uncanon(FloatParts128 *p, float_status *status, 777 const FloatFmt *fmt); 778 779 #define parts_uncanon(A, S, F) \ 780 PARTS_GENERIC_64_128(uncanon, A)(A, S, F) 781 782 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b); 783 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b); 784 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b); 785 786 #define parts_add_normal(A, B) \ 787 PARTS_GENERIC_64_128_256(add_normal, A)(A, B) 788 789 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b); 790 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b); 791 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b); 792 793 #define parts_sub_normal(A, B) \ 794 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B) 795 796 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b, 797 float_status *s, bool subtract); 798 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b, 799 float_status *s, bool subtract); 800 801 #define parts_addsub(A, B, S, Z) \ 802 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z) 803 804 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b, 805 float_status *s); 806 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b, 807 float_status *s); 808 809 #define parts_mul(A, B, S) \ 810 PARTS_GENERIC_64_128(mul, A)(A, B, S) 811 812 static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b, 813 FloatParts64 *c, int scale, 814 int flags, float_status *s); 815 static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b, 816 FloatParts128 *c, int scale, 817 int flags, float_status *s); 818 819 #define parts_muladd_scalbn(A, B, C, Z, Y, S) \ 820 PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S) 821 822 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b, 823 float_status *s); 824 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b, 825 float_status *s); 826 827 #define parts_div(A, B, S) \ 828 PARTS_GENERIC_64_128(div, A)(A, B, S) 829 830 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b, 831 uint64_t *mod_quot, float_status *s); 832 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b, 833 uint64_t *mod_quot, float_status *s); 834 835 #define parts_modrem(A, B, Q, S) \ 836 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S) 837 838 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f); 839 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f); 840 841 #define parts_sqrt(A, S, F) \ 842 PARTS_GENERIC_64_128(sqrt, A)(A, S, F) 843 844 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm, 845 int scale, int frac_size); 846 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r, 847 int scale, int frac_size); 848 849 #define parts_round_to_int_normal(A, R, C, F) \ 850 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F) 851 852 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm, 853 int scale, float_status *s, 854 const FloatFmt *fmt); 855 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r, 856 int scale, float_status *s, 857 const FloatFmt *fmt); 858 859 #define parts_round_to_int(A, R, C, S, F) \ 860 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F) 861 862 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode, 863 int scale, int64_t min, int64_t max, 864 float_status *s); 865 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode, 866 int scale, int64_t min, int64_t max, 867 float_status *s); 868 869 #define parts_float_to_sint(P, R, Z, MN, MX, S) \ 870 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S) 871 872 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode, 873 int scale, uint64_t max, 874 float_status *s); 875 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode, 876 int scale, uint64_t max, 877 float_status *s); 878 879 #define parts_float_to_uint(P, R, Z, M, S) \ 880 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S) 881 882 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p, 883 FloatRoundMode rmode, 884 int bitsm1, float_status *s); 885 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p, 886 FloatRoundMode rmode, 887 int bitsm1, float_status *s); 888 889 #define parts_float_to_sint_modulo(P, R, M, S) \ 890 PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S) 891 892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a, 893 int scale, float_status *s); 894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a, 895 int scale, float_status *s); 896 897 #define parts_float_to_sint(P, R, Z, MN, MX, S) \ 898 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S) 899 900 #define parts_sint_to_float(P, I, Z, S) \ 901 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S) 902 903 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a, 904 int scale, float_status *s); 905 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a, 906 int scale, float_status *s); 907 908 #define parts_uint_to_float(P, I, Z, S) \ 909 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S) 910 911 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b, 912 float_status *s, int flags); 913 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b, 914 float_status *s, int flags); 915 916 #define parts_minmax(A, B, S, F) \ 917 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F) 918 919 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b, 920 float_status *s, bool q); 921 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b, 922 float_status *s, bool q); 923 924 #define parts_compare(A, B, S, Q) \ 925 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q) 926 927 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s); 928 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s); 929 930 #define parts_scalbn(A, N, S) \ 931 PARTS_GENERIC_64_128(scalbn, A)(A, N, S) 932 933 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f); 934 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f); 935 936 #define parts_log2(A, S, F) \ 937 PARTS_GENERIC_64_128(log2, A)(A, S, F) 938 939 /* 940 * Helper functions for softfloat-parts.c.inc, per-size operations. 941 */ 942 943 #define FRAC_GENERIC_64_128(NAME, P) \ 944 _Generic((P), FloatParts64 *: frac64_##NAME, \ 945 FloatParts128 *: frac128_##NAME) 946 947 #define FRAC_GENERIC_64_128_256(NAME, P) \ 948 _Generic((P), FloatParts64 *: frac64_##NAME, \ 949 FloatParts128 *: frac128_##NAME, \ 950 FloatParts256 *: frac256_##NAME) 951 952 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b) 953 { 954 return uadd64_overflow(a->frac, b->frac, &r->frac); 955 } 956 957 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b) 958 { 959 bool c = 0; 960 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c); 961 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c); 962 return c; 963 } 964 965 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b) 966 { 967 bool c = 0; 968 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c); 969 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c); 970 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c); 971 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c); 972 return c; 973 } 974 975 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B) 976 977 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c) 978 { 979 return uadd64_overflow(a->frac, c, &r->frac); 980 } 981 982 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c) 983 { 984 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo); 985 return uadd64_overflow(a->frac_hi, c, &r->frac_hi); 986 } 987 988 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C) 989 990 static void frac64_allones(FloatParts64 *a) 991 { 992 a->frac = -1; 993 } 994 995 static void frac128_allones(FloatParts128 *a) 996 { 997 a->frac_hi = a->frac_lo = -1; 998 } 999 1000 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A) 1001 1002 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b) 1003 { 1004 return (a->frac == b->frac ? float_relation_equal 1005 : a->frac < b->frac ? float_relation_less 1006 : float_relation_greater); 1007 } 1008 1009 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b) 1010 { 1011 uint64_t ta = a->frac_hi, tb = b->frac_hi; 1012 if (ta == tb) { 1013 ta = a->frac_lo, tb = b->frac_lo; 1014 if (ta == tb) { 1015 return float_relation_equal; 1016 } 1017 } 1018 return ta < tb ? float_relation_less : float_relation_greater; 1019 } 1020 1021 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B) 1022 1023 static void frac64_clear(FloatParts64 *a) 1024 { 1025 a->frac = 0; 1026 } 1027 1028 static void frac128_clear(FloatParts128 *a) 1029 { 1030 a->frac_hi = a->frac_lo = 0; 1031 } 1032 1033 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A) 1034 1035 static bool frac64_div(FloatParts64 *a, FloatParts64 *b) 1036 { 1037 uint64_t n1, n0, r, q; 1038 bool ret; 1039 1040 /* 1041 * We want a 2*N / N-bit division to produce exactly an N-bit 1042 * result, so that we do not lose any precision and so that we 1043 * do not have to renormalize afterward. If A.frac < B.frac, 1044 * then division would produce an (N-1)-bit result; shift A left 1045 * by one to produce the an N-bit result, and return true to 1046 * decrement the exponent to match. 1047 * 1048 * The udiv_qrnnd algorithm that we're using requires normalization, 1049 * i.e. the msb of the denominator must be set, which is already true. 1050 */ 1051 ret = a->frac < b->frac; 1052 if (ret) { 1053 n0 = a->frac; 1054 n1 = 0; 1055 } else { 1056 n0 = a->frac >> 1; 1057 n1 = a->frac << 63; 1058 } 1059 q = udiv_qrnnd(&r, n0, n1, b->frac); 1060 1061 /* Set lsb if there is a remainder, to set inexact. */ 1062 a->frac = q | (r != 0); 1063 1064 return ret; 1065 } 1066 1067 static bool frac128_div(FloatParts128 *a, FloatParts128 *b) 1068 { 1069 uint64_t q0, q1, a0, a1, b0, b1; 1070 uint64_t r0, r1, r2, r3, t0, t1, t2, t3; 1071 bool ret = false; 1072 1073 a0 = a->frac_hi, a1 = a->frac_lo; 1074 b0 = b->frac_hi, b1 = b->frac_lo; 1075 1076 ret = lt128(a0, a1, b0, b1); 1077 if (!ret) { 1078 a1 = shr_double(a0, a1, 1); 1079 a0 = a0 >> 1; 1080 } 1081 1082 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */ 1083 q0 = estimateDiv128To64(a0, a1, b0); 1084 1085 /* 1086 * Estimate is high because B1 was not included (unless B1 == 0). 1087 * Reduce quotient and increase remainder until remainder is non-negative. 1088 * This loop will execute 0 to 2 times. 1089 */ 1090 mul128By64To192(b0, b1, q0, &t0, &t1, &t2); 1091 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2); 1092 while (r0 != 0) { 1093 q0--; 1094 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2); 1095 } 1096 1097 /* Repeat using the remainder, producing a second word of quotient. */ 1098 q1 = estimateDiv128To64(r1, r2, b0); 1099 mul128By64To192(b0, b1, q1, &t1, &t2, &t3); 1100 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3); 1101 while (r1 != 0) { 1102 q1--; 1103 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3); 1104 } 1105 1106 /* Any remainder indicates inexact; set sticky bit. */ 1107 q1 |= (r2 | r3) != 0; 1108 1109 a->frac_hi = q0; 1110 a->frac_lo = q1; 1111 return ret; 1112 } 1113 1114 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B) 1115 1116 static bool frac64_eqz(FloatParts64 *a) 1117 { 1118 return a->frac == 0; 1119 } 1120 1121 static bool frac128_eqz(FloatParts128 *a) 1122 { 1123 return (a->frac_hi | a->frac_lo) == 0; 1124 } 1125 1126 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A) 1127 1128 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b) 1129 { 1130 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac); 1131 } 1132 1133 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b) 1134 { 1135 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo, 1136 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo); 1137 } 1138 1139 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B) 1140 1141 static void frac64_neg(FloatParts64 *a) 1142 { 1143 a->frac = -a->frac; 1144 } 1145 1146 static void frac128_neg(FloatParts128 *a) 1147 { 1148 bool c = 0; 1149 a->frac_lo = usub64_borrow(0, a->frac_lo, &c); 1150 a->frac_hi = usub64_borrow(0, a->frac_hi, &c); 1151 } 1152 1153 static void frac256_neg(FloatParts256 *a) 1154 { 1155 bool c = 0; 1156 a->frac_lo = usub64_borrow(0, a->frac_lo, &c); 1157 a->frac_lm = usub64_borrow(0, a->frac_lm, &c); 1158 a->frac_hm = usub64_borrow(0, a->frac_hm, &c); 1159 a->frac_hi = usub64_borrow(0, a->frac_hi, &c); 1160 } 1161 1162 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A) 1163 1164 static int frac64_normalize(FloatParts64 *a) 1165 { 1166 if (a->frac) { 1167 int shift = clz64(a->frac); 1168 a->frac <<= shift; 1169 return shift; 1170 } 1171 return 64; 1172 } 1173 1174 static int frac128_normalize(FloatParts128 *a) 1175 { 1176 if (a->frac_hi) { 1177 int shl = clz64(a->frac_hi); 1178 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl); 1179 a->frac_lo <<= shl; 1180 return shl; 1181 } else if (a->frac_lo) { 1182 int shl = clz64(a->frac_lo); 1183 a->frac_hi = a->frac_lo << shl; 1184 a->frac_lo = 0; 1185 return shl + 64; 1186 } 1187 return 128; 1188 } 1189 1190 static int frac256_normalize(FloatParts256 *a) 1191 { 1192 uint64_t a0 = a->frac_hi, a1 = a->frac_hm; 1193 uint64_t a2 = a->frac_lm, a3 = a->frac_lo; 1194 int ret, shl; 1195 1196 if (likely(a0)) { 1197 shl = clz64(a0); 1198 if (shl == 0) { 1199 return 0; 1200 } 1201 ret = shl; 1202 } else { 1203 if (a1) { 1204 ret = 64; 1205 a0 = a1, a1 = a2, a2 = a3, a3 = 0; 1206 } else if (a2) { 1207 ret = 128; 1208 a0 = a2, a1 = a3, a2 = 0, a3 = 0; 1209 } else if (a3) { 1210 ret = 192; 1211 a0 = a3, a1 = 0, a2 = 0, a3 = 0; 1212 } else { 1213 ret = 256; 1214 a0 = 0, a1 = 0, a2 = 0, a3 = 0; 1215 goto done; 1216 } 1217 shl = clz64(a0); 1218 if (shl == 0) { 1219 goto done; 1220 } 1221 ret += shl; 1222 } 1223 1224 a0 = shl_double(a0, a1, shl); 1225 a1 = shl_double(a1, a2, shl); 1226 a2 = shl_double(a2, a3, shl); 1227 a3 <<= shl; 1228 1229 done: 1230 a->frac_hi = a0; 1231 a->frac_hm = a1; 1232 a->frac_lm = a2; 1233 a->frac_lo = a3; 1234 return ret; 1235 } 1236 1237 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A) 1238 1239 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot) 1240 { 1241 uint64_t a0, a1, b0, t0, t1, q, quot; 1242 int exp_diff = a->exp - b->exp; 1243 int shift; 1244 1245 a0 = a->frac; 1246 a1 = 0; 1247 1248 if (exp_diff < -1) { 1249 if (mod_quot) { 1250 *mod_quot = 0; 1251 } 1252 return; 1253 } 1254 if (exp_diff == -1) { 1255 a0 >>= 1; 1256 exp_diff = 0; 1257 } 1258 1259 b0 = b->frac; 1260 quot = q = b0 <= a0; 1261 if (q) { 1262 a0 -= b0; 1263 } 1264 1265 exp_diff -= 64; 1266 while (exp_diff > 0) { 1267 q = estimateDiv128To64(a0, a1, b0); 1268 q = q > 2 ? q - 2 : 0; 1269 mul64To128(b0, q, &t0, &t1); 1270 sub128(a0, a1, t0, t1, &a0, &a1); 1271 shortShift128Left(a0, a1, 62, &a0, &a1); 1272 exp_diff -= 62; 1273 quot = (quot << 62) + q; 1274 } 1275 1276 exp_diff += 64; 1277 if (exp_diff > 0) { 1278 q = estimateDiv128To64(a0, a1, b0); 1279 q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0; 1280 mul64To128(b0, q << (64 - exp_diff), &t0, &t1); 1281 sub128(a0, a1, t0, t1, &a0, &a1); 1282 shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1); 1283 while (le128(t0, t1, a0, a1)) { 1284 ++q; 1285 sub128(a0, a1, t0, t1, &a0, &a1); 1286 } 1287 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q; 1288 } else { 1289 t0 = b0; 1290 t1 = 0; 1291 } 1292 1293 if (mod_quot) { 1294 *mod_quot = quot; 1295 } else { 1296 sub128(t0, t1, a0, a1, &t0, &t1); 1297 if (lt128(t0, t1, a0, a1) || 1298 (eq128(t0, t1, a0, a1) && (q & 1))) { 1299 a0 = t0; 1300 a1 = t1; 1301 a->sign = !a->sign; 1302 } 1303 } 1304 1305 if (likely(a0)) { 1306 shift = clz64(a0); 1307 shortShift128Left(a0, a1, shift, &a0, &a1); 1308 } else if (likely(a1)) { 1309 shift = clz64(a1); 1310 a0 = a1 << shift; 1311 a1 = 0; 1312 shift += 64; 1313 } else { 1314 a->cls = float_class_zero; 1315 return; 1316 } 1317 1318 a->exp = b->exp + exp_diff - shift; 1319 a->frac = a0 | (a1 != 0); 1320 } 1321 1322 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b, 1323 uint64_t *mod_quot) 1324 { 1325 uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot; 1326 int exp_diff = a->exp - b->exp; 1327 int shift; 1328 1329 a0 = a->frac_hi; 1330 a1 = a->frac_lo; 1331 a2 = 0; 1332 1333 if (exp_diff < -1) { 1334 if (mod_quot) { 1335 *mod_quot = 0; 1336 } 1337 return; 1338 } 1339 if (exp_diff == -1) { 1340 shift128Right(a0, a1, 1, &a0, &a1); 1341 exp_diff = 0; 1342 } 1343 1344 b0 = b->frac_hi; 1345 b1 = b->frac_lo; 1346 1347 quot = q = le128(b0, b1, a0, a1); 1348 if (q) { 1349 sub128(a0, a1, b0, b1, &a0, &a1); 1350 } 1351 1352 exp_diff -= 64; 1353 while (exp_diff > 0) { 1354 q = estimateDiv128To64(a0, a1, b0); 1355 q = q > 4 ? q - 4 : 0; 1356 mul128By64To192(b0, b1, q, &t0, &t1, &t2); 1357 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2); 1358 shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2); 1359 exp_diff -= 61; 1360 quot = (quot << 61) + q; 1361 } 1362 1363 exp_diff += 64; 1364 if (exp_diff > 0) { 1365 q = estimateDiv128To64(a0, a1, b0); 1366 q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0; 1367 mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2); 1368 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2); 1369 shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2); 1370 while (le192(t0, t1, t2, a0, a1, a2)) { 1371 ++q; 1372 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2); 1373 } 1374 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q; 1375 } else { 1376 t0 = b0; 1377 t1 = b1; 1378 t2 = 0; 1379 } 1380 1381 if (mod_quot) { 1382 *mod_quot = quot; 1383 } else { 1384 sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2); 1385 if (lt192(t0, t1, t2, a0, a1, a2) || 1386 (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) { 1387 a0 = t0; 1388 a1 = t1; 1389 a2 = t2; 1390 a->sign = !a->sign; 1391 } 1392 } 1393 1394 if (likely(a0)) { 1395 shift = clz64(a0); 1396 shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2); 1397 } else if (likely(a1)) { 1398 shift = clz64(a1); 1399 shortShift128Left(a1, a2, shift, &a0, &a1); 1400 a2 = 0; 1401 shift += 64; 1402 } else if (likely(a2)) { 1403 shift = clz64(a2); 1404 a0 = a2 << shift; 1405 a1 = a2 = 0; 1406 shift += 128; 1407 } else { 1408 a->cls = float_class_zero; 1409 return; 1410 } 1411 1412 a->exp = b->exp + exp_diff - shift; 1413 a->frac_hi = a0; 1414 a->frac_lo = a1 | (a2 != 0); 1415 } 1416 1417 #define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q) 1418 1419 static void frac64_shl(FloatParts64 *a, int c) 1420 { 1421 a->frac <<= c; 1422 } 1423 1424 static void frac128_shl(FloatParts128 *a, int c) 1425 { 1426 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; 1427 1428 if (c & 64) { 1429 a0 = a1, a1 = 0; 1430 } 1431 1432 c &= 63; 1433 if (c) { 1434 a0 = shl_double(a0, a1, c); 1435 a1 = a1 << c; 1436 } 1437 1438 a->frac_hi = a0; 1439 a->frac_lo = a1; 1440 } 1441 1442 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C) 1443 1444 static void frac64_shr(FloatParts64 *a, int c) 1445 { 1446 a->frac >>= c; 1447 } 1448 1449 static void frac128_shr(FloatParts128 *a, int c) 1450 { 1451 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; 1452 1453 if (c & 64) { 1454 a1 = a0, a0 = 0; 1455 } 1456 1457 c &= 63; 1458 if (c) { 1459 a1 = shr_double(a0, a1, c); 1460 a0 = a0 >> c; 1461 } 1462 1463 a->frac_hi = a0; 1464 a->frac_lo = a1; 1465 } 1466 1467 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C) 1468 1469 static void frac64_shrjam(FloatParts64 *a, int c) 1470 { 1471 uint64_t a0 = a->frac; 1472 1473 if (likely(c != 0)) { 1474 if (likely(c < 64)) { 1475 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0); 1476 } else { 1477 a0 = a0 != 0; 1478 } 1479 a->frac = a0; 1480 } 1481 } 1482 1483 static void frac128_shrjam(FloatParts128 *a, int c) 1484 { 1485 uint64_t a0 = a->frac_hi, a1 = a->frac_lo; 1486 uint64_t sticky = 0; 1487 1488 if (unlikely(c == 0)) { 1489 return; 1490 } else if (likely(c < 64)) { 1491 /* nothing */ 1492 } else if (likely(c < 128)) { 1493 sticky = a1; 1494 a1 = a0; 1495 a0 = 0; 1496 c &= 63; 1497 if (c == 0) { 1498 goto done; 1499 } 1500 } else { 1501 sticky = a0 | a1; 1502 a0 = a1 = 0; 1503 goto done; 1504 } 1505 1506 sticky |= shr_double(a1, 0, c); 1507 a1 = shr_double(a0, a1, c); 1508 a0 = a0 >> c; 1509 1510 done: 1511 a->frac_lo = a1 | (sticky != 0); 1512 a->frac_hi = a0; 1513 } 1514 1515 static void frac256_shrjam(FloatParts256 *a, int c) 1516 { 1517 uint64_t a0 = a->frac_hi, a1 = a->frac_hm; 1518 uint64_t a2 = a->frac_lm, a3 = a->frac_lo; 1519 uint64_t sticky = 0; 1520 1521 if (unlikely(c == 0)) { 1522 return; 1523 } else if (likely(c < 64)) { 1524 /* nothing */ 1525 } else if (likely(c < 256)) { 1526 if (unlikely(c & 128)) { 1527 sticky |= a2 | a3; 1528 a3 = a1, a2 = a0, a1 = 0, a0 = 0; 1529 } 1530 if (unlikely(c & 64)) { 1531 sticky |= a3; 1532 a3 = a2, a2 = a1, a1 = a0, a0 = 0; 1533 } 1534 c &= 63; 1535 if (c == 0) { 1536 goto done; 1537 } 1538 } else { 1539 sticky = a0 | a1 | a2 | a3; 1540 a0 = a1 = a2 = a3 = 0; 1541 goto done; 1542 } 1543 1544 sticky |= shr_double(a3, 0, c); 1545 a3 = shr_double(a2, a3, c); 1546 a2 = shr_double(a1, a2, c); 1547 a1 = shr_double(a0, a1, c); 1548 a0 = a0 >> c; 1549 1550 done: 1551 a->frac_lo = a3 | (sticky != 0); 1552 a->frac_lm = a2; 1553 a->frac_hm = a1; 1554 a->frac_hi = a0; 1555 } 1556 1557 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C) 1558 1559 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b) 1560 { 1561 return usub64_overflow(a->frac, b->frac, &r->frac); 1562 } 1563 1564 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b) 1565 { 1566 bool c = 0; 1567 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c); 1568 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c); 1569 return c; 1570 } 1571 1572 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b) 1573 { 1574 bool c = 0; 1575 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c); 1576 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c); 1577 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c); 1578 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c); 1579 return c; 1580 } 1581 1582 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B) 1583 1584 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a) 1585 { 1586 r->frac = a->frac_hi | (a->frac_lo != 0); 1587 } 1588 1589 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a) 1590 { 1591 r->frac_hi = a->frac_hi; 1592 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0); 1593 } 1594 1595 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A) 1596 1597 static void frac64_widen(FloatParts128 *r, FloatParts64 *a) 1598 { 1599 r->frac_hi = a->frac; 1600 r->frac_lo = 0; 1601 } 1602 1603 static void frac128_widen(FloatParts256 *r, FloatParts128 *a) 1604 { 1605 r->frac_hi = a->frac_hi; 1606 r->frac_hm = a->frac_lo; 1607 r->frac_lm = 0; 1608 r->frac_lo = 0; 1609 } 1610 1611 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B) 1612 1613 /* 1614 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa. 1615 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c 1616 * and thus MIT licenced. 1617 */ 1618 static const uint16_t rsqrt_tab[128] = { 1619 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43, 1620 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b, 1621 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1, 1622 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430, 1623 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59, 1624 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925, 1625 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479, 1626 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040, 1627 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234, 1628 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2, 1629 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1, 1630 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192, 1631 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f, 1632 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4, 1633 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59, 1634 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560, 1635 }; 1636 1637 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME) 1638 #define FloatPartsN glue(FloatParts,N) 1639 #define FloatPartsW glue(FloatParts,W) 1640 1641 #define N 64 1642 #define W 128 1643 1644 #include "softfloat-parts-addsub.c.inc" 1645 #include "softfloat-parts.c.inc" 1646 1647 #undef N 1648 #undef W 1649 #define N 128 1650 #define W 256 1651 1652 #include "softfloat-parts-addsub.c.inc" 1653 #include "softfloat-parts.c.inc" 1654 1655 #undef N 1656 #undef W 1657 #define N 256 1658 1659 #include "softfloat-parts-addsub.c.inc" 1660 1661 #undef N 1662 #undef W 1663 #undef partsN 1664 #undef FloatPartsN 1665 #undef FloatPartsW 1666 1667 /* 1668 * Pack/unpack routines with a specific FloatFmt. 1669 */ 1670 1671 static void float16a_unpack_canonical(FloatParts64 *p, float16 f, 1672 float_status *s, const FloatFmt *params) 1673 { 1674 float16_unpack_raw(p, f); 1675 parts_canonicalize(p, s, params); 1676 } 1677 1678 static void float16_unpack_canonical(FloatParts64 *p, float16 f, 1679 float_status *s) 1680 { 1681 float16a_unpack_canonical(p, f, s, &float16_params); 1682 } 1683 1684 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f, 1685 float_status *s) 1686 { 1687 bfloat16_unpack_raw(p, f); 1688 parts_canonicalize(p, s, &bfloat16_params); 1689 } 1690 1691 static float16 float16a_round_pack_canonical(FloatParts64 *p, 1692 float_status *s, 1693 const FloatFmt *params) 1694 { 1695 parts_uncanon(p, s, params); 1696 return float16_pack_raw(p); 1697 } 1698 1699 static float16 float16_round_pack_canonical(FloatParts64 *p, 1700 float_status *s) 1701 { 1702 return float16a_round_pack_canonical(p, s, &float16_params); 1703 } 1704 1705 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p, 1706 float_status *s) 1707 { 1708 parts_uncanon(p, s, &bfloat16_params); 1709 return bfloat16_pack_raw(p); 1710 } 1711 1712 static void float32_unpack_canonical(FloatParts64 *p, float32 f, 1713 float_status *s) 1714 { 1715 float32_unpack_raw(p, f); 1716 parts_canonicalize(p, s, &float32_params); 1717 } 1718 1719 static float32 float32_round_pack_canonical(FloatParts64 *p, 1720 float_status *s) 1721 { 1722 parts_uncanon(p, s, &float32_params); 1723 return float32_pack_raw(p); 1724 } 1725 1726 static void float64_unpack_canonical(FloatParts64 *p, float64 f, 1727 float_status *s) 1728 { 1729 float64_unpack_raw(p, f); 1730 parts_canonicalize(p, s, &float64_params); 1731 } 1732 1733 static float64 float64_round_pack_canonical(FloatParts64 *p, 1734 float_status *s) 1735 { 1736 parts_uncanon(p, s, &float64_params); 1737 return float64_pack_raw(p); 1738 } 1739 1740 static float64 float64r32_round_pack_canonical(FloatParts64 *p, 1741 float_status *s) 1742 { 1743 parts_uncanon(p, s, &float32_params); 1744 1745 /* 1746 * In parts_uncanon, we placed the fraction for float32 at the lsb. 1747 * We need to adjust the fraction higher so that the least N bits are 1748 * zero, and the fraction is adjacent to the float64 implicit bit. 1749 */ 1750 switch (p->cls) { 1751 case float_class_normal: 1752 case float_class_denormal: 1753 if (unlikely(p->exp == 0)) { 1754 /* 1755 * The result is denormal for float32, but can be represented 1756 * in normalized form for float64. Adjust, per canonicalize. 1757 */ 1758 int shift = frac_normalize(p); 1759 p->exp = (float32_params.frac_shift - 1760 float32_params.exp_bias - shift + 1 + 1761 float64_params.exp_bias); 1762 frac_shr(p, float64_params.frac_shift); 1763 } else { 1764 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift); 1765 p->exp += float64_params.exp_bias - float32_params.exp_bias; 1766 } 1767 break; 1768 case float_class_snan: 1769 case float_class_qnan: 1770 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift); 1771 p->exp = float64_params.exp_max; 1772 break; 1773 case float_class_inf: 1774 p->exp = float64_params.exp_max; 1775 break; 1776 case float_class_zero: 1777 break; 1778 default: 1779 g_assert_not_reached(); 1780 } 1781 1782 return float64_pack_raw(p); 1783 } 1784 1785 static void float128_unpack_canonical(FloatParts128 *p, float128 f, 1786 float_status *s) 1787 { 1788 float128_unpack_raw(p, f); 1789 parts_canonicalize(p, s, &float128_params); 1790 } 1791 1792 static float128 float128_round_pack_canonical(FloatParts128 *p, 1793 float_status *s) 1794 { 1795 parts_uncanon(p, s, &float128_params); 1796 return float128_pack_raw(p); 1797 } 1798 1799 /* Returns false if the encoding is invalid. */ 1800 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f, 1801 float_status *s) 1802 { 1803 /* Ensure rounding precision is set before beginning. */ 1804 switch (s->floatx80_rounding_precision) { 1805 case floatx80_precision_x: 1806 case floatx80_precision_d: 1807 case floatx80_precision_s: 1808 break; 1809 default: 1810 g_assert_not_reached(); 1811 } 1812 1813 if (unlikely(floatx80_invalid_encoding(f))) { 1814 float_raise(float_flag_invalid, s); 1815 return false; 1816 } 1817 1818 floatx80_unpack_raw(p, f); 1819 1820 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) { 1821 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]); 1822 } else { 1823 /* The explicit integer bit is ignored, after invalid checks. */ 1824 p->frac_hi &= MAKE_64BIT_MASK(0, 63); 1825 p->cls = (p->frac_hi == 0 ? float_class_inf 1826 : parts_is_snan_frac(p->frac_hi, s) 1827 ? float_class_snan : float_class_qnan); 1828 } 1829 return true; 1830 } 1831 1832 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p, 1833 float_status *s) 1834 { 1835 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision]; 1836 uint64_t frac; 1837 int exp; 1838 1839 switch (p->cls) { 1840 case float_class_normal: 1841 case float_class_denormal: 1842 if (s->floatx80_rounding_precision == floatx80_precision_x) { 1843 parts_uncanon_normal(p, s, fmt); 1844 frac = p->frac_hi; 1845 exp = p->exp; 1846 } else { 1847 FloatParts64 p64; 1848 1849 p64.sign = p->sign; 1850 p64.exp = p->exp; 1851 frac_truncjam(&p64, p); 1852 parts_uncanon_normal(&p64, s, fmt); 1853 frac = p64.frac; 1854 exp = p64.exp; 1855 } 1856 if (exp != fmt->exp_max) { 1857 break; 1858 } 1859 /* rounded to inf -- fall through to set frac correctly */ 1860 1861 case float_class_inf: 1862 /* x86 and m68k differ in the setting of the integer bit. */ 1863 frac = floatx80_infinity_low; 1864 exp = fmt->exp_max; 1865 break; 1866 1867 case float_class_zero: 1868 frac = 0; 1869 exp = 0; 1870 break; 1871 1872 case float_class_snan: 1873 case float_class_qnan: 1874 /* NaNs have the integer bit set. */ 1875 frac = p->frac_hi | (1ull << 63); 1876 exp = fmt->exp_max; 1877 break; 1878 1879 default: 1880 g_assert_not_reached(); 1881 } 1882 1883 return packFloatx80(p->sign, exp, frac); 1884 } 1885 1886 /* 1887 * Addition and subtraction 1888 */ 1889 1890 static float16 QEMU_FLATTEN 1891 float16_addsub(float16 a, float16 b, float_status *status, bool subtract) 1892 { 1893 FloatParts64 pa, pb, *pr; 1894 1895 float16_unpack_canonical(&pa, a, status); 1896 float16_unpack_canonical(&pb, b, status); 1897 pr = parts_addsub(&pa, &pb, status, subtract); 1898 1899 return float16_round_pack_canonical(pr, status); 1900 } 1901 1902 float16 float16_add(float16 a, float16 b, float_status *status) 1903 { 1904 return float16_addsub(a, b, status, false); 1905 } 1906 1907 float16 float16_sub(float16 a, float16 b, float_status *status) 1908 { 1909 return float16_addsub(a, b, status, true); 1910 } 1911 1912 static float32 QEMU_SOFTFLOAT_ATTR 1913 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract) 1914 { 1915 FloatParts64 pa, pb, *pr; 1916 1917 float32_unpack_canonical(&pa, a, status); 1918 float32_unpack_canonical(&pb, b, status); 1919 pr = parts_addsub(&pa, &pb, status, subtract); 1920 1921 return float32_round_pack_canonical(pr, status); 1922 } 1923 1924 static float32 soft_f32_add(float32 a, float32 b, float_status *status) 1925 { 1926 return soft_f32_addsub(a, b, status, false); 1927 } 1928 1929 static float32 soft_f32_sub(float32 a, float32 b, float_status *status) 1930 { 1931 return soft_f32_addsub(a, b, status, true); 1932 } 1933 1934 static float64 QEMU_SOFTFLOAT_ATTR 1935 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract) 1936 { 1937 FloatParts64 pa, pb, *pr; 1938 1939 float64_unpack_canonical(&pa, a, status); 1940 float64_unpack_canonical(&pb, b, status); 1941 pr = parts_addsub(&pa, &pb, status, subtract); 1942 1943 return float64_round_pack_canonical(pr, status); 1944 } 1945 1946 static float64 soft_f64_add(float64 a, float64 b, float_status *status) 1947 { 1948 return soft_f64_addsub(a, b, status, false); 1949 } 1950 1951 static float64 soft_f64_sub(float64 a, float64 b, float_status *status) 1952 { 1953 return soft_f64_addsub(a, b, status, true); 1954 } 1955 1956 static float hard_f32_add(float a, float b) 1957 { 1958 return a + b; 1959 } 1960 1961 static float hard_f32_sub(float a, float b) 1962 { 1963 return a - b; 1964 } 1965 1966 static double hard_f64_add(double a, double b) 1967 { 1968 return a + b; 1969 } 1970 1971 static double hard_f64_sub(double a, double b) 1972 { 1973 return a - b; 1974 } 1975 1976 static bool f32_addsubmul_post(union_float32 a, union_float32 b) 1977 { 1978 if (QEMU_HARDFLOAT_2F32_USE_FP) { 1979 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); 1980 } 1981 return !(float32_is_zero(a.s) && float32_is_zero(b.s)); 1982 } 1983 1984 static bool f64_addsubmul_post(union_float64 a, union_float64 b) 1985 { 1986 if (QEMU_HARDFLOAT_2F64_USE_FP) { 1987 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); 1988 } else { 1989 return !(float64_is_zero(a.s) && float64_is_zero(b.s)); 1990 } 1991 } 1992 1993 static float32 float32_addsub(float32 a, float32 b, float_status *s, 1994 hard_f32_op2_fn hard, soft_f32_op2_fn soft) 1995 { 1996 return float32_gen2(a, b, s, hard, soft, 1997 f32_is_zon2, f32_addsubmul_post); 1998 } 1999 2000 static float64 float64_addsub(float64 a, float64 b, float_status *s, 2001 hard_f64_op2_fn hard, soft_f64_op2_fn soft) 2002 { 2003 return float64_gen2(a, b, s, hard, soft, 2004 f64_is_zon2, f64_addsubmul_post); 2005 } 2006 2007 float32 QEMU_FLATTEN 2008 float32_add(float32 a, float32 b, float_status *s) 2009 { 2010 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add); 2011 } 2012 2013 float32 QEMU_FLATTEN 2014 float32_sub(float32 a, float32 b, float_status *s) 2015 { 2016 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub); 2017 } 2018 2019 float64 QEMU_FLATTEN 2020 float64_add(float64 a, float64 b, float_status *s) 2021 { 2022 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add); 2023 } 2024 2025 float64 QEMU_FLATTEN 2026 float64_sub(float64 a, float64 b, float_status *s) 2027 { 2028 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub); 2029 } 2030 2031 static float64 float64r32_addsub(float64 a, float64 b, float_status *status, 2032 bool subtract) 2033 { 2034 FloatParts64 pa, pb, *pr; 2035 2036 float64_unpack_canonical(&pa, a, status); 2037 float64_unpack_canonical(&pb, b, status); 2038 pr = parts_addsub(&pa, &pb, status, subtract); 2039 2040 return float64r32_round_pack_canonical(pr, status); 2041 } 2042 2043 float64 float64r32_add(float64 a, float64 b, float_status *status) 2044 { 2045 return float64r32_addsub(a, b, status, false); 2046 } 2047 2048 float64 float64r32_sub(float64 a, float64 b, float_status *status) 2049 { 2050 return float64r32_addsub(a, b, status, true); 2051 } 2052 2053 static bfloat16 QEMU_FLATTEN 2054 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract) 2055 { 2056 FloatParts64 pa, pb, *pr; 2057 2058 bfloat16_unpack_canonical(&pa, a, status); 2059 bfloat16_unpack_canonical(&pb, b, status); 2060 pr = parts_addsub(&pa, &pb, status, subtract); 2061 2062 return bfloat16_round_pack_canonical(pr, status); 2063 } 2064 2065 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status) 2066 { 2067 return bfloat16_addsub(a, b, status, false); 2068 } 2069 2070 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status) 2071 { 2072 return bfloat16_addsub(a, b, status, true); 2073 } 2074 2075 static float128 QEMU_FLATTEN 2076 float128_addsub(float128 a, float128 b, float_status *status, bool subtract) 2077 { 2078 FloatParts128 pa, pb, *pr; 2079 2080 float128_unpack_canonical(&pa, a, status); 2081 float128_unpack_canonical(&pb, b, status); 2082 pr = parts_addsub(&pa, &pb, status, subtract); 2083 2084 return float128_round_pack_canonical(pr, status); 2085 } 2086 2087 float128 float128_add(float128 a, float128 b, float_status *status) 2088 { 2089 return float128_addsub(a, b, status, false); 2090 } 2091 2092 float128 float128_sub(float128 a, float128 b, float_status *status) 2093 { 2094 return float128_addsub(a, b, status, true); 2095 } 2096 2097 static floatx80 QEMU_FLATTEN 2098 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract) 2099 { 2100 FloatParts128 pa, pb, *pr; 2101 2102 if (!floatx80_unpack_canonical(&pa, a, status) || 2103 !floatx80_unpack_canonical(&pb, b, status)) { 2104 return floatx80_default_nan(status); 2105 } 2106 2107 pr = parts_addsub(&pa, &pb, status, subtract); 2108 return floatx80_round_pack_canonical(pr, status); 2109 } 2110 2111 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) 2112 { 2113 return floatx80_addsub(a, b, status, false); 2114 } 2115 2116 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) 2117 { 2118 return floatx80_addsub(a, b, status, true); 2119 } 2120 2121 /* 2122 * Multiplication 2123 */ 2124 2125 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status) 2126 { 2127 FloatParts64 pa, pb, *pr; 2128 2129 float16_unpack_canonical(&pa, a, status); 2130 float16_unpack_canonical(&pb, b, status); 2131 pr = parts_mul(&pa, &pb, status); 2132 2133 return float16_round_pack_canonical(pr, status); 2134 } 2135 2136 static float32 QEMU_SOFTFLOAT_ATTR 2137 soft_f32_mul(float32 a, float32 b, float_status *status) 2138 { 2139 FloatParts64 pa, pb, *pr; 2140 2141 float32_unpack_canonical(&pa, a, status); 2142 float32_unpack_canonical(&pb, b, status); 2143 pr = parts_mul(&pa, &pb, status); 2144 2145 return float32_round_pack_canonical(pr, status); 2146 } 2147 2148 static float64 QEMU_SOFTFLOAT_ATTR 2149 soft_f64_mul(float64 a, float64 b, float_status *status) 2150 { 2151 FloatParts64 pa, pb, *pr; 2152 2153 float64_unpack_canonical(&pa, a, status); 2154 float64_unpack_canonical(&pb, b, status); 2155 pr = parts_mul(&pa, &pb, status); 2156 2157 return float64_round_pack_canonical(pr, status); 2158 } 2159 2160 static float hard_f32_mul(float a, float b) 2161 { 2162 return a * b; 2163 } 2164 2165 static double hard_f64_mul(double a, double b) 2166 { 2167 return a * b; 2168 } 2169 2170 float32 QEMU_FLATTEN 2171 float32_mul(float32 a, float32 b, float_status *s) 2172 { 2173 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul, 2174 f32_is_zon2, f32_addsubmul_post); 2175 } 2176 2177 float64 QEMU_FLATTEN 2178 float64_mul(float64 a, float64 b, float_status *s) 2179 { 2180 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul, 2181 f64_is_zon2, f64_addsubmul_post); 2182 } 2183 2184 float64 float64r32_mul(float64 a, float64 b, float_status *status) 2185 { 2186 FloatParts64 pa, pb, *pr; 2187 2188 float64_unpack_canonical(&pa, a, status); 2189 float64_unpack_canonical(&pb, b, status); 2190 pr = parts_mul(&pa, &pb, status); 2191 2192 return float64r32_round_pack_canonical(pr, status); 2193 } 2194 2195 bfloat16 QEMU_FLATTEN 2196 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status) 2197 { 2198 FloatParts64 pa, pb, *pr; 2199 2200 bfloat16_unpack_canonical(&pa, a, status); 2201 bfloat16_unpack_canonical(&pb, b, status); 2202 pr = parts_mul(&pa, &pb, status); 2203 2204 return bfloat16_round_pack_canonical(pr, status); 2205 } 2206 2207 float128 QEMU_FLATTEN 2208 float128_mul(float128 a, float128 b, float_status *status) 2209 { 2210 FloatParts128 pa, pb, *pr; 2211 2212 float128_unpack_canonical(&pa, a, status); 2213 float128_unpack_canonical(&pb, b, status); 2214 pr = parts_mul(&pa, &pb, status); 2215 2216 return float128_round_pack_canonical(pr, status); 2217 } 2218 2219 floatx80 QEMU_FLATTEN 2220 floatx80_mul(floatx80 a, floatx80 b, float_status *status) 2221 { 2222 FloatParts128 pa, pb, *pr; 2223 2224 if (!floatx80_unpack_canonical(&pa, a, status) || 2225 !floatx80_unpack_canonical(&pb, b, status)) { 2226 return floatx80_default_nan(status); 2227 } 2228 2229 pr = parts_mul(&pa, &pb, status); 2230 return floatx80_round_pack_canonical(pr, status); 2231 } 2232 2233 /* 2234 * Fused multiply-add 2235 */ 2236 2237 float16 QEMU_FLATTEN 2238 float16_muladd_scalbn(float16 a, float16 b, float16 c, 2239 int scale, int flags, float_status *status) 2240 { 2241 FloatParts64 pa, pb, pc, *pr; 2242 2243 float16_unpack_canonical(&pa, a, status); 2244 float16_unpack_canonical(&pb, b, status); 2245 float16_unpack_canonical(&pc, c, status); 2246 pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status); 2247 2248 return float16_round_pack_canonical(pr, status); 2249 } 2250 2251 float16 float16_muladd(float16 a, float16 b, float16 c, 2252 int flags, float_status *status) 2253 { 2254 return float16_muladd_scalbn(a, b, c, 0, flags, status); 2255 } 2256 2257 float32 QEMU_SOFTFLOAT_ATTR 2258 float32_muladd_scalbn(float32 a, float32 b, float32 c, 2259 int scale, int flags, float_status *status) 2260 { 2261 FloatParts64 pa, pb, pc, *pr; 2262 2263 float32_unpack_canonical(&pa, a, status); 2264 float32_unpack_canonical(&pb, b, status); 2265 float32_unpack_canonical(&pc, c, status); 2266 pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status); 2267 2268 return float32_round_pack_canonical(pr, status); 2269 } 2270 2271 float64 QEMU_SOFTFLOAT_ATTR 2272 float64_muladd_scalbn(float64 a, float64 b, float64 c, 2273 int scale, int flags, float_status *status) 2274 { 2275 FloatParts64 pa, pb, pc, *pr; 2276 2277 float64_unpack_canonical(&pa, a, status); 2278 float64_unpack_canonical(&pb, b, status); 2279 float64_unpack_canonical(&pc, c, status); 2280 pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status); 2281 2282 return float64_round_pack_canonical(pr, status); 2283 } 2284 2285 static bool force_soft_fma; 2286 2287 float32 QEMU_FLATTEN 2288 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) 2289 { 2290 union_float32 ua, ub, uc, ur; 2291 2292 ua.s = xa; 2293 ub.s = xb; 2294 uc.s = xc; 2295 2296 if (unlikely(!can_use_fpu(s))) { 2297 goto soft; 2298 } 2299 if (unlikely(flags & float_muladd_suppress_add_product_zero)) { 2300 goto soft; 2301 } 2302 2303 float32_input_flush3(&ua.s, &ub.s, &uc.s, s); 2304 if (unlikely(!f32_is_zon3(ua, ub, uc))) { 2305 goto soft; 2306 } 2307 2308 if (unlikely(force_soft_fma)) { 2309 goto soft; 2310 } 2311 2312 /* 2313 * When (a || b) == 0, there's no need to check for under/over flow, 2314 * since we know the addend is (normal || 0) and the product is 0. 2315 */ 2316 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) { 2317 union_float32 up; 2318 bool prod_sign; 2319 2320 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s); 2321 prod_sign ^= !!(flags & float_muladd_negate_product); 2322 up.s = float32_set_sign(float32_zero, prod_sign); 2323 2324 if (flags & float_muladd_negate_c) { 2325 uc.h = -uc.h; 2326 } 2327 ur.h = up.h + uc.h; 2328 } else { 2329 union_float32 ua_orig = ua; 2330 union_float32 uc_orig = uc; 2331 2332 if (flags & float_muladd_negate_product) { 2333 ua.h = -ua.h; 2334 } 2335 if (flags & float_muladd_negate_c) { 2336 uc.h = -uc.h; 2337 } 2338 2339 ur.h = fmaf(ua.h, ub.h, uc.h); 2340 2341 if (unlikely(f32_is_inf(ur))) { 2342 float_raise(float_flag_overflow, s); 2343 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { 2344 ua = ua_orig; 2345 uc = uc_orig; 2346 goto soft; 2347 } 2348 } 2349 if (flags & float_muladd_negate_result) { 2350 return float32_chs(ur.s); 2351 } 2352 return ur.s; 2353 2354 soft: 2355 return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s); 2356 } 2357 2358 float64 QEMU_FLATTEN 2359 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) 2360 { 2361 union_float64 ua, ub, uc, ur; 2362 2363 ua.s = xa; 2364 ub.s = xb; 2365 uc.s = xc; 2366 2367 if (unlikely(!can_use_fpu(s))) { 2368 goto soft; 2369 } 2370 2371 float64_input_flush3(&ua.s, &ub.s, &uc.s, s); 2372 if (unlikely(!f64_is_zon3(ua, ub, uc))) { 2373 goto soft; 2374 } 2375 2376 if (unlikely(force_soft_fma)) { 2377 goto soft; 2378 } 2379 2380 /* 2381 * When (a || b) == 0, there's no need to check for under/over flow, 2382 * since we know the addend is (normal || 0) and the product is 0. 2383 */ 2384 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) { 2385 union_float64 up; 2386 bool prod_sign; 2387 2388 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s); 2389 prod_sign ^= !!(flags & float_muladd_negate_product); 2390 up.s = float64_set_sign(float64_zero, prod_sign); 2391 2392 if (flags & float_muladd_negate_c) { 2393 uc.h = -uc.h; 2394 } 2395 ur.h = up.h + uc.h; 2396 } else { 2397 union_float64 ua_orig = ua; 2398 union_float64 uc_orig = uc; 2399 2400 if (flags & float_muladd_negate_product) { 2401 ua.h = -ua.h; 2402 } 2403 if (flags & float_muladd_negate_c) { 2404 uc.h = -uc.h; 2405 } 2406 2407 ur.h = fma(ua.h, ub.h, uc.h); 2408 2409 if (unlikely(f64_is_inf(ur))) { 2410 float_raise(float_flag_overflow, s); 2411 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) { 2412 ua = ua_orig; 2413 uc = uc_orig; 2414 goto soft; 2415 } 2416 } 2417 if (flags & float_muladd_negate_result) { 2418 return float64_chs(ur.s); 2419 } 2420 return ur.s; 2421 2422 soft: 2423 return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s); 2424 } 2425 2426 float64 float64r32_muladd(float64 a, float64 b, float64 c, 2427 int flags, float_status *status) 2428 { 2429 FloatParts64 pa, pb, pc, *pr; 2430 2431 float64_unpack_canonical(&pa, a, status); 2432 float64_unpack_canonical(&pb, b, status); 2433 float64_unpack_canonical(&pc, c, status); 2434 pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status); 2435 2436 return float64r32_round_pack_canonical(pr, status); 2437 } 2438 2439 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c, 2440 int flags, float_status *status) 2441 { 2442 FloatParts64 pa, pb, pc, *pr; 2443 2444 bfloat16_unpack_canonical(&pa, a, status); 2445 bfloat16_unpack_canonical(&pb, b, status); 2446 bfloat16_unpack_canonical(&pc, c, status); 2447 pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status); 2448 2449 return bfloat16_round_pack_canonical(pr, status); 2450 } 2451 2452 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c, 2453 int flags, float_status *status) 2454 { 2455 FloatParts128 pa, pb, pc, *pr; 2456 2457 float128_unpack_canonical(&pa, a, status); 2458 float128_unpack_canonical(&pb, b, status); 2459 float128_unpack_canonical(&pc, c, status); 2460 pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status); 2461 2462 return float128_round_pack_canonical(pr, status); 2463 } 2464 2465 /* 2466 * Division 2467 */ 2468 2469 float16 float16_div(float16 a, float16 b, float_status *status) 2470 { 2471 FloatParts64 pa, pb, *pr; 2472 2473 float16_unpack_canonical(&pa, a, status); 2474 float16_unpack_canonical(&pb, b, status); 2475 pr = parts_div(&pa, &pb, status); 2476 2477 return float16_round_pack_canonical(pr, status); 2478 } 2479 2480 static float32 QEMU_SOFTFLOAT_ATTR 2481 soft_f32_div(float32 a, float32 b, float_status *status) 2482 { 2483 FloatParts64 pa, pb, *pr; 2484 2485 float32_unpack_canonical(&pa, a, status); 2486 float32_unpack_canonical(&pb, b, status); 2487 pr = parts_div(&pa, &pb, status); 2488 2489 return float32_round_pack_canonical(pr, status); 2490 } 2491 2492 static float64 QEMU_SOFTFLOAT_ATTR 2493 soft_f64_div(float64 a, float64 b, float_status *status) 2494 { 2495 FloatParts64 pa, pb, *pr; 2496 2497 float64_unpack_canonical(&pa, a, status); 2498 float64_unpack_canonical(&pb, b, status); 2499 pr = parts_div(&pa, &pb, status); 2500 2501 return float64_round_pack_canonical(pr, status); 2502 } 2503 2504 static float hard_f32_div(float a, float b) 2505 { 2506 return a / b; 2507 } 2508 2509 static double hard_f64_div(double a, double b) 2510 { 2511 return a / b; 2512 } 2513 2514 static bool f32_div_pre(union_float32 a, union_float32 b) 2515 { 2516 if (QEMU_HARDFLOAT_2F32_USE_FP) { 2517 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 2518 fpclassify(b.h) == FP_NORMAL; 2519 } 2520 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s); 2521 } 2522 2523 static bool f64_div_pre(union_float64 a, union_float64 b) 2524 { 2525 if (QEMU_HARDFLOAT_2F64_USE_FP) { 2526 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && 2527 fpclassify(b.h) == FP_NORMAL; 2528 } 2529 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s); 2530 } 2531 2532 static bool f32_div_post(union_float32 a, union_float32 b) 2533 { 2534 if (QEMU_HARDFLOAT_2F32_USE_FP) { 2535 return fpclassify(a.h) != FP_ZERO; 2536 } 2537 return !float32_is_zero(a.s); 2538 } 2539 2540 static bool f64_div_post(union_float64 a, union_float64 b) 2541 { 2542 if (QEMU_HARDFLOAT_2F64_USE_FP) { 2543 return fpclassify(a.h) != FP_ZERO; 2544 } 2545 return !float64_is_zero(a.s); 2546 } 2547 2548 float32 QEMU_FLATTEN 2549 float32_div(float32 a, float32 b, float_status *s) 2550 { 2551 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div, 2552 f32_div_pre, f32_div_post); 2553 } 2554 2555 float64 QEMU_FLATTEN 2556 float64_div(float64 a, float64 b, float_status *s) 2557 { 2558 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div, 2559 f64_div_pre, f64_div_post); 2560 } 2561 2562 float64 float64r32_div(float64 a, float64 b, float_status *status) 2563 { 2564 FloatParts64 pa, pb, *pr; 2565 2566 float64_unpack_canonical(&pa, a, status); 2567 float64_unpack_canonical(&pb, b, status); 2568 pr = parts_div(&pa, &pb, status); 2569 2570 return float64r32_round_pack_canonical(pr, status); 2571 } 2572 2573 bfloat16 QEMU_FLATTEN 2574 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status) 2575 { 2576 FloatParts64 pa, pb, *pr; 2577 2578 bfloat16_unpack_canonical(&pa, a, status); 2579 bfloat16_unpack_canonical(&pb, b, status); 2580 pr = parts_div(&pa, &pb, status); 2581 2582 return bfloat16_round_pack_canonical(pr, status); 2583 } 2584 2585 float128 QEMU_FLATTEN 2586 float128_div(float128 a, float128 b, float_status *status) 2587 { 2588 FloatParts128 pa, pb, *pr; 2589 2590 float128_unpack_canonical(&pa, a, status); 2591 float128_unpack_canonical(&pb, b, status); 2592 pr = parts_div(&pa, &pb, status); 2593 2594 return float128_round_pack_canonical(pr, status); 2595 } 2596 2597 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) 2598 { 2599 FloatParts128 pa, pb, *pr; 2600 2601 if (!floatx80_unpack_canonical(&pa, a, status) || 2602 !floatx80_unpack_canonical(&pb, b, status)) { 2603 return floatx80_default_nan(status); 2604 } 2605 2606 pr = parts_div(&pa, &pb, status); 2607 return floatx80_round_pack_canonical(pr, status); 2608 } 2609 2610 /* 2611 * Remainder 2612 */ 2613 2614 float32 float32_rem(float32 a, float32 b, float_status *status) 2615 { 2616 FloatParts64 pa, pb, *pr; 2617 2618 float32_unpack_canonical(&pa, a, status); 2619 float32_unpack_canonical(&pb, b, status); 2620 pr = parts_modrem(&pa, &pb, NULL, status); 2621 2622 return float32_round_pack_canonical(pr, status); 2623 } 2624 2625 float64 float64_rem(float64 a, float64 b, float_status *status) 2626 { 2627 FloatParts64 pa, pb, *pr; 2628 2629 float64_unpack_canonical(&pa, a, status); 2630 float64_unpack_canonical(&pb, b, status); 2631 pr = parts_modrem(&pa, &pb, NULL, status); 2632 2633 return float64_round_pack_canonical(pr, status); 2634 } 2635 2636 float128 float128_rem(float128 a, float128 b, float_status *status) 2637 { 2638 FloatParts128 pa, pb, *pr; 2639 2640 float128_unpack_canonical(&pa, a, status); 2641 float128_unpack_canonical(&pb, b, status); 2642 pr = parts_modrem(&pa, &pb, NULL, status); 2643 2644 return float128_round_pack_canonical(pr, status); 2645 } 2646 2647 /* 2648 * Returns the remainder of the extended double-precision floating-point value 2649 * `a' with respect to the corresponding value `b'. 2650 * If 'mod' is false, the operation is performed according to the IEC/IEEE 2651 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return 2652 * the remainder based on truncating the quotient toward zero instead and 2653 * *quotient is set to the low 64 bits of the absolute value of the integer 2654 * quotient. 2655 */ 2656 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, 2657 uint64_t *quotient, float_status *status) 2658 { 2659 FloatParts128 pa, pb, *pr; 2660 2661 *quotient = 0; 2662 if (!floatx80_unpack_canonical(&pa, a, status) || 2663 !floatx80_unpack_canonical(&pb, b, status)) { 2664 return floatx80_default_nan(status); 2665 } 2666 pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status); 2667 2668 return floatx80_round_pack_canonical(pr, status); 2669 } 2670 2671 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) 2672 { 2673 uint64_t quotient; 2674 return floatx80_modrem(a, b, false, "ient, status); 2675 } 2676 2677 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) 2678 { 2679 uint64_t quotient; 2680 return floatx80_modrem(a, b, true, "ient, status); 2681 } 2682 2683 /* 2684 * Float to Float conversions 2685 * 2686 * Returns the result of converting one float format to another. The 2687 * conversion is performed according to the IEC/IEEE Standard for 2688 * Binary Floating-Point Arithmetic. 2689 * 2690 * Usually this only needs to take care of raising invalid exceptions 2691 * and handling the conversion on NaNs. 2692 */ 2693 2694 static void parts_float_to_ahp(FloatParts64 *a, float_status *s) 2695 { 2696 switch (a->cls) { 2697 case float_class_snan: 2698 float_raise(float_flag_invalid_snan, s); 2699 /* fall through */ 2700 case float_class_qnan: 2701 /* 2702 * There is no NaN in the destination format. Raise Invalid 2703 * and return a zero with the sign of the input NaN. 2704 */ 2705 float_raise(float_flag_invalid, s); 2706 a->cls = float_class_zero; 2707 break; 2708 2709 case float_class_inf: 2710 /* 2711 * There is no Inf in the destination format. Raise Invalid 2712 * and return the maximum normal with the correct sign. 2713 */ 2714 float_raise(float_flag_invalid, s); 2715 a->cls = float_class_normal; 2716 a->exp = float16_params_ahp.exp_max; 2717 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift, 2718 float16_params_ahp.frac_size + 1); 2719 break; 2720 2721 case float_class_normal: 2722 case float_class_denormal: 2723 case float_class_zero: 2724 break; 2725 2726 default: 2727 g_assert_not_reached(); 2728 } 2729 } 2730 2731 static void parts64_float_to_float(FloatParts64 *a, float_status *s) 2732 { 2733 if (is_nan(a->cls)) { 2734 parts_return_nan(a, s); 2735 } 2736 } 2737 2738 static void parts128_float_to_float(FloatParts128 *a, float_status *s) 2739 { 2740 if (is_nan(a->cls)) { 2741 parts_return_nan(a, s); 2742 } 2743 } 2744 2745 #define parts_float_to_float(P, S) \ 2746 PARTS_GENERIC_64_128(float_to_float, P)(P, S) 2747 2748 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, 2749 float_status *s) 2750 { 2751 a->cls = b->cls; 2752 a->sign = b->sign; 2753 a->exp = b->exp; 2754 2755 if (is_anynorm(a->cls)) { 2756 frac_truncjam(a, b); 2757 } else if (is_nan(a->cls)) { 2758 /* Discard the low bits of the NaN. */ 2759 a->frac = b->frac_hi; 2760 parts_return_nan(a, s); 2761 } 2762 } 2763 2764 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, 2765 float_status *s) 2766 { 2767 a->cls = b->cls; 2768 a->sign = b->sign; 2769 a->exp = b->exp; 2770 frac_widen(a, b); 2771 2772 if (is_nan(a->cls)) { 2773 parts_return_nan(a, s); 2774 } 2775 } 2776 2777 float32 float16_to_float32(float16 a, bool ieee, float_status *s) 2778 { 2779 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; 2780 FloatParts64 p; 2781 2782 float16a_unpack_canonical(&p, a, s, fmt16); 2783 parts_float_to_float(&p, s); 2784 return float32_round_pack_canonical(&p, s); 2785 } 2786 2787 float64 float16_to_float64(float16 a, bool ieee, float_status *s) 2788 { 2789 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; 2790 FloatParts64 p; 2791 2792 float16a_unpack_canonical(&p, a, s, fmt16); 2793 parts_float_to_float(&p, s); 2794 return float64_round_pack_canonical(&p, s); 2795 } 2796 2797 float16 float32_to_float16(float32 a, bool ieee, float_status *s) 2798 { 2799 FloatParts64 p; 2800 const FloatFmt *fmt; 2801 2802 float32_unpack_canonical(&p, a, s); 2803 if (ieee) { 2804 parts_float_to_float(&p, s); 2805 fmt = &float16_params; 2806 } else { 2807 parts_float_to_ahp(&p, s); 2808 fmt = &float16_params_ahp; 2809 } 2810 return float16a_round_pack_canonical(&p, s, fmt); 2811 } 2812 2813 static float64 QEMU_SOFTFLOAT_ATTR 2814 soft_float32_to_float64(float32 a, float_status *s) 2815 { 2816 FloatParts64 p; 2817 2818 float32_unpack_canonical(&p, a, s); 2819 parts_float_to_float(&p, s); 2820 return float64_round_pack_canonical(&p, s); 2821 } 2822 2823 float64 float32_to_float64(float32 a, float_status *s) 2824 { 2825 if (likely(float32_is_normal(a))) { 2826 /* Widening conversion can never produce inexact results. */ 2827 union_float32 uf; 2828 union_float64 ud; 2829 uf.s = a; 2830 ud.h = uf.h; 2831 return ud.s; 2832 } else if (float32_is_zero(a)) { 2833 return float64_set_sign(float64_zero, float32_is_neg(a)); 2834 } else { 2835 return soft_float32_to_float64(a, s); 2836 } 2837 } 2838 2839 float16 float64_to_float16(float64 a, bool ieee, float_status *s) 2840 { 2841 FloatParts64 p; 2842 const FloatFmt *fmt; 2843 2844 float64_unpack_canonical(&p, a, s); 2845 if (ieee) { 2846 parts_float_to_float(&p, s); 2847 fmt = &float16_params; 2848 } else { 2849 parts_float_to_ahp(&p, s); 2850 fmt = &float16_params_ahp; 2851 } 2852 return float16a_round_pack_canonical(&p, s, fmt); 2853 } 2854 2855 float32 float64_to_float32(float64 a, float_status *s) 2856 { 2857 FloatParts64 p; 2858 2859 float64_unpack_canonical(&p, a, s); 2860 parts_float_to_float(&p, s); 2861 return float32_round_pack_canonical(&p, s); 2862 } 2863 2864 float32 bfloat16_to_float32(bfloat16 a, float_status *s) 2865 { 2866 FloatParts64 p; 2867 2868 bfloat16_unpack_canonical(&p, a, s); 2869 parts_float_to_float(&p, s); 2870 return float32_round_pack_canonical(&p, s); 2871 } 2872 2873 float64 bfloat16_to_float64(bfloat16 a, float_status *s) 2874 { 2875 FloatParts64 p; 2876 2877 bfloat16_unpack_canonical(&p, a, s); 2878 parts_float_to_float(&p, s); 2879 return float64_round_pack_canonical(&p, s); 2880 } 2881 2882 bfloat16 float32_to_bfloat16(float32 a, float_status *s) 2883 { 2884 FloatParts64 p; 2885 2886 float32_unpack_canonical(&p, a, s); 2887 parts_float_to_float(&p, s); 2888 return bfloat16_round_pack_canonical(&p, s); 2889 } 2890 2891 bfloat16 float64_to_bfloat16(float64 a, float_status *s) 2892 { 2893 FloatParts64 p; 2894 2895 float64_unpack_canonical(&p, a, s); 2896 parts_float_to_float(&p, s); 2897 return bfloat16_round_pack_canonical(&p, s); 2898 } 2899 2900 float32 float128_to_float32(float128 a, float_status *s) 2901 { 2902 FloatParts64 p64; 2903 FloatParts128 p128; 2904 2905 float128_unpack_canonical(&p128, a, s); 2906 parts_float_to_float_narrow(&p64, &p128, s); 2907 return float32_round_pack_canonical(&p64, s); 2908 } 2909 2910 float64 float128_to_float64(float128 a, float_status *s) 2911 { 2912 FloatParts64 p64; 2913 FloatParts128 p128; 2914 2915 float128_unpack_canonical(&p128, a, s); 2916 parts_float_to_float_narrow(&p64, &p128, s); 2917 return float64_round_pack_canonical(&p64, s); 2918 } 2919 2920 float128 float32_to_float128(float32 a, float_status *s) 2921 { 2922 FloatParts64 p64; 2923 FloatParts128 p128; 2924 2925 float32_unpack_canonical(&p64, a, s); 2926 parts_float_to_float_widen(&p128, &p64, s); 2927 return float128_round_pack_canonical(&p128, s); 2928 } 2929 2930 float128 float64_to_float128(float64 a, float_status *s) 2931 { 2932 FloatParts64 p64; 2933 FloatParts128 p128; 2934 2935 float64_unpack_canonical(&p64, a, s); 2936 parts_float_to_float_widen(&p128, &p64, s); 2937 return float128_round_pack_canonical(&p128, s); 2938 } 2939 2940 float32 floatx80_to_float32(floatx80 a, float_status *s) 2941 { 2942 FloatParts64 p64; 2943 FloatParts128 p128; 2944 2945 if (floatx80_unpack_canonical(&p128, a, s)) { 2946 parts_float_to_float_narrow(&p64, &p128, s); 2947 } else { 2948 parts_default_nan(&p64, s); 2949 } 2950 return float32_round_pack_canonical(&p64, s); 2951 } 2952 2953 float64 floatx80_to_float64(floatx80 a, float_status *s) 2954 { 2955 FloatParts64 p64; 2956 FloatParts128 p128; 2957 2958 if (floatx80_unpack_canonical(&p128, a, s)) { 2959 parts_float_to_float_narrow(&p64, &p128, s); 2960 } else { 2961 parts_default_nan(&p64, s); 2962 } 2963 return float64_round_pack_canonical(&p64, s); 2964 } 2965 2966 float128 floatx80_to_float128(floatx80 a, float_status *s) 2967 { 2968 FloatParts128 p; 2969 2970 if (floatx80_unpack_canonical(&p, a, s)) { 2971 parts_float_to_float(&p, s); 2972 } else { 2973 parts_default_nan(&p, s); 2974 } 2975 return float128_round_pack_canonical(&p, s); 2976 } 2977 2978 floatx80 float32_to_floatx80(float32 a, float_status *s) 2979 { 2980 FloatParts64 p64; 2981 FloatParts128 p128; 2982 2983 float32_unpack_canonical(&p64, a, s); 2984 parts_float_to_float_widen(&p128, &p64, s); 2985 return floatx80_round_pack_canonical(&p128, s); 2986 } 2987 2988 floatx80 float64_to_floatx80(float64 a, float_status *s) 2989 { 2990 FloatParts64 p64; 2991 FloatParts128 p128; 2992 2993 float64_unpack_canonical(&p64, a, s); 2994 parts_float_to_float_widen(&p128, &p64, s); 2995 return floatx80_round_pack_canonical(&p128, s); 2996 } 2997 2998 floatx80 float128_to_floatx80(float128 a, float_status *s) 2999 { 3000 FloatParts128 p; 3001 3002 float128_unpack_canonical(&p, a, s); 3003 parts_float_to_float(&p, s); 3004 return floatx80_round_pack_canonical(&p, s); 3005 } 3006 3007 /* 3008 * Round to integral value 3009 */ 3010 3011 float16 float16_round_to_int(float16 a, float_status *s) 3012 { 3013 FloatParts64 p; 3014 3015 float16_unpack_canonical(&p, a, s); 3016 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params); 3017 return float16_round_pack_canonical(&p, s); 3018 } 3019 3020 float32 float32_round_to_int(float32 a, float_status *s) 3021 { 3022 FloatParts64 p; 3023 3024 float32_unpack_canonical(&p, a, s); 3025 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params); 3026 return float32_round_pack_canonical(&p, s); 3027 } 3028 3029 float64 float64_round_to_int(float64 a, float_status *s) 3030 { 3031 FloatParts64 p; 3032 3033 float64_unpack_canonical(&p, a, s); 3034 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params); 3035 return float64_round_pack_canonical(&p, s); 3036 } 3037 3038 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s) 3039 { 3040 FloatParts64 p; 3041 3042 bfloat16_unpack_canonical(&p, a, s); 3043 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params); 3044 return bfloat16_round_pack_canonical(&p, s); 3045 } 3046 3047 float128 float128_round_to_int(float128 a, float_status *s) 3048 { 3049 FloatParts128 p; 3050 3051 float128_unpack_canonical(&p, a, s); 3052 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params); 3053 return float128_round_pack_canonical(&p, s); 3054 } 3055 3056 floatx80 floatx80_round_to_int(floatx80 a, float_status *status) 3057 { 3058 FloatParts128 p; 3059 3060 if (!floatx80_unpack_canonical(&p, a, status)) { 3061 return floatx80_default_nan(status); 3062 } 3063 3064 parts_round_to_int(&p, status->float_rounding_mode, 0, status, 3065 &floatx80_params[status->floatx80_rounding_precision]); 3066 return floatx80_round_pack_canonical(&p, status); 3067 } 3068 3069 /* 3070 * Floating-point to signed integer conversions 3071 */ 3072 3073 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale, 3074 float_status *s) 3075 { 3076 FloatParts64 p; 3077 3078 float16_unpack_canonical(&p, a, s); 3079 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s); 3080 } 3081 3082 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale, 3083 float_status *s) 3084 { 3085 FloatParts64 p; 3086 3087 float16_unpack_canonical(&p, a, s); 3088 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 3089 } 3090 3091 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale, 3092 float_status *s) 3093 { 3094 FloatParts64 p; 3095 3096 float16_unpack_canonical(&p, a, s); 3097 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 3098 } 3099 3100 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale, 3101 float_status *s) 3102 { 3103 FloatParts64 p; 3104 3105 float16_unpack_canonical(&p, a, s); 3106 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 3107 } 3108 3109 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale, 3110 float_status *s) 3111 { 3112 FloatParts64 p; 3113 3114 float32_unpack_canonical(&p, a, s); 3115 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 3116 } 3117 3118 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale, 3119 float_status *s) 3120 { 3121 FloatParts64 p; 3122 3123 float32_unpack_canonical(&p, a, s); 3124 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 3125 } 3126 3127 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale, 3128 float_status *s) 3129 { 3130 FloatParts64 p; 3131 3132 float32_unpack_canonical(&p, a, s); 3133 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 3134 } 3135 3136 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale, 3137 float_status *s) 3138 { 3139 FloatParts64 p; 3140 3141 float64_unpack_canonical(&p, a, s); 3142 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 3143 } 3144 3145 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale, 3146 float_status *s) 3147 { 3148 FloatParts64 p; 3149 3150 float64_unpack_canonical(&p, a, s); 3151 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 3152 } 3153 3154 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale, 3155 float_status *s) 3156 { 3157 FloatParts64 p; 3158 3159 float64_unpack_canonical(&p, a, s); 3160 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 3161 } 3162 3163 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, 3164 float_status *s) 3165 { 3166 FloatParts64 p; 3167 3168 bfloat16_unpack_canonical(&p, a, s); 3169 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s); 3170 } 3171 3172 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, 3173 float_status *s) 3174 { 3175 FloatParts64 p; 3176 3177 bfloat16_unpack_canonical(&p, a, s); 3178 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); 3179 } 3180 3181 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, 3182 float_status *s) 3183 { 3184 FloatParts64 p; 3185 3186 bfloat16_unpack_canonical(&p, a, s); 3187 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 3188 } 3189 3190 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, 3191 float_status *s) 3192 { 3193 FloatParts64 p; 3194 3195 bfloat16_unpack_canonical(&p, a, s); 3196 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 3197 } 3198 3199 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode, 3200 int scale, float_status *s) 3201 { 3202 FloatParts128 p; 3203 3204 float128_unpack_canonical(&p, a, s); 3205 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 3206 } 3207 3208 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode, 3209 int scale, float_status *s) 3210 { 3211 FloatParts128 p; 3212 3213 float128_unpack_canonical(&p, a, s); 3214 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 3215 } 3216 3217 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode, 3218 int scale, float_status *s) 3219 { 3220 int flags = 0; 3221 Int128 r; 3222 FloatParts128 p; 3223 3224 float128_unpack_canonical(&p, a, s); 3225 3226 switch (p.cls) { 3227 case float_class_snan: 3228 flags |= float_flag_invalid_snan; 3229 /* fall through */ 3230 case float_class_qnan: 3231 flags |= float_flag_invalid; 3232 r = UINT128_MAX; 3233 break; 3234 3235 case float_class_inf: 3236 flags = float_flag_invalid | float_flag_invalid_cvti; 3237 r = p.sign ? INT128_MIN : INT128_MAX; 3238 break; 3239 3240 case float_class_zero: 3241 return int128_zero(); 3242 3243 case float_class_normal: 3244 case float_class_denormal: 3245 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { 3246 flags = float_flag_inexact; 3247 } 3248 3249 if (p.exp < 127) { 3250 int shift = 127 - p.exp; 3251 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift); 3252 if (p.sign) { 3253 r = int128_neg(r); 3254 } 3255 } else if (p.exp == 127 && p.sign && p.frac_lo == 0 && 3256 p.frac_hi == DECOMPOSED_IMPLICIT_BIT) { 3257 r = INT128_MIN; 3258 } else { 3259 flags = float_flag_invalid | float_flag_invalid_cvti; 3260 r = p.sign ? INT128_MIN : INT128_MAX; 3261 } 3262 break; 3263 3264 default: 3265 g_assert_not_reached(); 3266 } 3267 3268 float_raise(flags, s); 3269 return r; 3270 } 3271 3272 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode, 3273 int scale, float_status *s) 3274 { 3275 FloatParts128 p; 3276 3277 if (!floatx80_unpack_canonical(&p, a, s)) { 3278 parts_default_nan(&p, s); 3279 } 3280 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); 3281 } 3282 3283 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode, 3284 int scale, float_status *s) 3285 { 3286 FloatParts128 p; 3287 3288 if (!floatx80_unpack_canonical(&p, a, s)) { 3289 parts_default_nan(&p, s); 3290 } 3291 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); 3292 } 3293 3294 int8_t float16_to_int8(float16 a, float_status *s) 3295 { 3296 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s); 3297 } 3298 3299 int16_t float16_to_int16(float16 a, float_status *s) 3300 { 3301 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 3302 } 3303 3304 int32_t float16_to_int32(float16 a, float_status *s) 3305 { 3306 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 3307 } 3308 3309 int64_t float16_to_int64(float16 a, float_status *s) 3310 { 3311 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 3312 } 3313 3314 int16_t float32_to_int16(float32 a, float_status *s) 3315 { 3316 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 3317 } 3318 3319 int32_t float32_to_int32(float32 a, float_status *s) 3320 { 3321 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 3322 } 3323 3324 int64_t float32_to_int64(float32 a, float_status *s) 3325 { 3326 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 3327 } 3328 3329 int16_t float64_to_int16(float64 a, float_status *s) 3330 { 3331 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 3332 } 3333 3334 int32_t float64_to_int32(float64 a, float_status *s) 3335 { 3336 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 3337 } 3338 3339 int64_t float64_to_int64(float64 a, float_status *s) 3340 { 3341 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 3342 } 3343 3344 int32_t float128_to_int32(float128 a, float_status *s) 3345 { 3346 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 3347 } 3348 3349 int64_t float128_to_int64(float128 a, float_status *s) 3350 { 3351 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 3352 } 3353 3354 Int128 float128_to_int128(float128 a, float_status *s) 3355 { 3356 return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s); 3357 } 3358 3359 int32_t floatx80_to_int32(floatx80 a, float_status *s) 3360 { 3361 return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 3362 } 3363 3364 int64_t floatx80_to_int64(floatx80 a, float_status *s) 3365 { 3366 return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 3367 } 3368 3369 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s) 3370 { 3371 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s); 3372 } 3373 3374 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s) 3375 { 3376 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s); 3377 } 3378 3379 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s) 3380 { 3381 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s); 3382 } 3383 3384 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s) 3385 { 3386 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s); 3387 } 3388 3389 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s) 3390 { 3391 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s); 3392 } 3393 3394 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s) 3395 { 3396 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s); 3397 } 3398 3399 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s) 3400 { 3401 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s); 3402 } 3403 3404 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s) 3405 { 3406 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s); 3407 } 3408 3409 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s) 3410 { 3411 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s); 3412 } 3413 3414 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s) 3415 { 3416 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s); 3417 } 3418 3419 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s) 3420 { 3421 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s); 3422 } 3423 3424 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s) 3425 { 3426 return float128_to_int128_scalbn(a, float_round_to_zero, 0, s); 3427 } 3428 3429 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s) 3430 { 3431 return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s); 3432 } 3433 3434 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s) 3435 { 3436 return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s); 3437 } 3438 3439 int8_t bfloat16_to_int8(bfloat16 a, float_status *s) 3440 { 3441 return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s); 3442 } 3443 3444 int16_t bfloat16_to_int16(bfloat16 a, float_status *s) 3445 { 3446 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); 3447 } 3448 3449 int32_t bfloat16_to_int32(bfloat16 a, float_status *s) 3450 { 3451 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); 3452 } 3453 3454 int64_t bfloat16_to_int64(bfloat16 a, float_status *s) 3455 { 3456 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); 3457 } 3458 3459 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s) 3460 { 3461 return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s); 3462 } 3463 3464 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s) 3465 { 3466 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s); 3467 } 3468 3469 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s) 3470 { 3471 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s); 3472 } 3473 3474 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s) 3475 { 3476 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s); 3477 } 3478 3479 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode, 3480 float_status *s) 3481 { 3482 FloatParts64 p; 3483 3484 float64_unpack_canonical(&p, a, s); 3485 return parts_float_to_sint_modulo(&p, rmode, 31, s); 3486 } 3487 3488 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode, 3489 float_status *s) 3490 { 3491 FloatParts64 p; 3492 3493 float64_unpack_canonical(&p, a, s); 3494 return parts_float_to_sint_modulo(&p, rmode, 63, s); 3495 } 3496 3497 /* 3498 * Floating-point to unsigned integer conversions 3499 */ 3500 3501 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale, 3502 float_status *s) 3503 { 3504 FloatParts64 p; 3505 3506 float16_unpack_canonical(&p, a, s); 3507 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s); 3508 } 3509 3510 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale, 3511 float_status *s) 3512 { 3513 FloatParts64 p; 3514 3515 float16_unpack_canonical(&p, a, s); 3516 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 3517 } 3518 3519 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale, 3520 float_status *s) 3521 { 3522 FloatParts64 p; 3523 3524 float16_unpack_canonical(&p, a, s); 3525 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 3526 } 3527 3528 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale, 3529 float_status *s) 3530 { 3531 FloatParts64 p; 3532 3533 float16_unpack_canonical(&p, a, s); 3534 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 3535 } 3536 3537 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale, 3538 float_status *s) 3539 { 3540 FloatParts64 p; 3541 3542 float32_unpack_canonical(&p, a, s); 3543 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 3544 } 3545 3546 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale, 3547 float_status *s) 3548 { 3549 FloatParts64 p; 3550 3551 float32_unpack_canonical(&p, a, s); 3552 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 3553 } 3554 3555 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale, 3556 float_status *s) 3557 { 3558 FloatParts64 p; 3559 3560 float32_unpack_canonical(&p, a, s); 3561 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 3562 } 3563 3564 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale, 3565 float_status *s) 3566 { 3567 FloatParts64 p; 3568 3569 float64_unpack_canonical(&p, a, s); 3570 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 3571 } 3572 3573 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale, 3574 float_status *s) 3575 { 3576 FloatParts64 p; 3577 3578 float64_unpack_canonical(&p, a, s); 3579 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 3580 } 3581 3582 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale, 3583 float_status *s) 3584 { 3585 FloatParts64 p; 3586 3587 float64_unpack_canonical(&p, a, s); 3588 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 3589 } 3590 3591 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode, 3592 int scale, float_status *s) 3593 { 3594 FloatParts64 p; 3595 3596 bfloat16_unpack_canonical(&p, a, s); 3597 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s); 3598 } 3599 3600 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode, 3601 int scale, float_status *s) 3602 { 3603 FloatParts64 p; 3604 3605 bfloat16_unpack_canonical(&p, a, s); 3606 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); 3607 } 3608 3609 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode, 3610 int scale, float_status *s) 3611 { 3612 FloatParts64 p; 3613 3614 bfloat16_unpack_canonical(&p, a, s); 3615 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 3616 } 3617 3618 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode, 3619 int scale, float_status *s) 3620 { 3621 FloatParts64 p; 3622 3623 bfloat16_unpack_canonical(&p, a, s); 3624 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 3625 } 3626 3627 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode, 3628 int scale, float_status *s) 3629 { 3630 FloatParts128 p; 3631 3632 float128_unpack_canonical(&p, a, s); 3633 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); 3634 } 3635 3636 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode, 3637 int scale, float_status *s) 3638 { 3639 FloatParts128 p; 3640 3641 float128_unpack_canonical(&p, a, s); 3642 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); 3643 } 3644 3645 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode, 3646 int scale, float_status *s) 3647 { 3648 int flags = 0; 3649 Int128 r; 3650 FloatParts128 p; 3651 3652 float128_unpack_canonical(&p, a, s); 3653 3654 switch (p.cls) { 3655 case float_class_snan: 3656 flags |= float_flag_invalid_snan; 3657 /* fall through */ 3658 case float_class_qnan: 3659 flags |= float_flag_invalid; 3660 r = UINT128_MAX; 3661 break; 3662 3663 case float_class_inf: 3664 flags = float_flag_invalid | float_flag_invalid_cvti; 3665 r = p.sign ? int128_zero() : UINT128_MAX; 3666 break; 3667 3668 case float_class_zero: 3669 return int128_zero(); 3670 3671 case float_class_normal: 3672 case float_class_denormal: 3673 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { 3674 flags = float_flag_inexact; 3675 if (p.cls == float_class_zero) { 3676 r = int128_zero(); 3677 break; 3678 } 3679 } 3680 3681 if (p.sign) { 3682 flags = float_flag_invalid | float_flag_invalid_cvti; 3683 r = int128_zero(); 3684 } else if (p.exp <= 127) { 3685 int shift = 127 - p.exp; 3686 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift); 3687 } else { 3688 flags = float_flag_invalid | float_flag_invalid_cvti; 3689 r = UINT128_MAX; 3690 } 3691 break; 3692 3693 default: 3694 g_assert_not_reached(); 3695 } 3696 3697 float_raise(flags, s); 3698 return r; 3699 } 3700 3701 uint8_t float16_to_uint8(float16 a, float_status *s) 3702 { 3703 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s); 3704 } 3705 3706 uint16_t float16_to_uint16(float16 a, float_status *s) 3707 { 3708 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 3709 } 3710 3711 uint32_t float16_to_uint32(float16 a, float_status *s) 3712 { 3713 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 3714 } 3715 3716 uint64_t float16_to_uint64(float16 a, float_status *s) 3717 { 3718 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 3719 } 3720 3721 uint16_t float32_to_uint16(float32 a, float_status *s) 3722 { 3723 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 3724 } 3725 3726 uint32_t float32_to_uint32(float32 a, float_status *s) 3727 { 3728 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 3729 } 3730 3731 uint64_t float32_to_uint64(float32 a, float_status *s) 3732 { 3733 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 3734 } 3735 3736 uint16_t float64_to_uint16(float64 a, float_status *s) 3737 { 3738 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 3739 } 3740 3741 uint32_t float64_to_uint32(float64 a, float_status *s) 3742 { 3743 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 3744 } 3745 3746 uint64_t float64_to_uint64(float64 a, float_status *s) 3747 { 3748 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 3749 } 3750 3751 uint32_t float128_to_uint32(float128 a, float_status *s) 3752 { 3753 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 3754 } 3755 3756 uint64_t float128_to_uint64(float128 a, float_status *s) 3757 { 3758 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 3759 } 3760 3761 Int128 float128_to_uint128(float128 a, float_status *s) 3762 { 3763 return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s); 3764 } 3765 3766 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s) 3767 { 3768 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s); 3769 } 3770 3771 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s) 3772 { 3773 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s); 3774 } 3775 3776 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s) 3777 { 3778 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s); 3779 } 3780 3781 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s) 3782 { 3783 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s); 3784 } 3785 3786 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s) 3787 { 3788 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s); 3789 } 3790 3791 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s) 3792 { 3793 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s); 3794 } 3795 3796 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s) 3797 { 3798 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s); 3799 } 3800 3801 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s) 3802 { 3803 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s); 3804 } 3805 3806 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s) 3807 { 3808 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s); 3809 } 3810 3811 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s) 3812 { 3813 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s); 3814 } 3815 3816 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s) 3817 { 3818 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s); 3819 } 3820 3821 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s) 3822 { 3823 return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s); 3824 } 3825 3826 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s) 3827 { 3828 return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s); 3829 } 3830 3831 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s) 3832 { 3833 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); 3834 } 3835 3836 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s) 3837 { 3838 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); 3839 } 3840 3841 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s) 3842 { 3843 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); 3844 } 3845 3846 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s) 3847 { 3848 return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s); 3849 } 3850 3851 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s) 3852 { 3853 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s); 3854 } 3855 3856 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s) 3857 { 3858 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s); 3859 } 3860 3861 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s) 3862 { 3863 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s); 3864 } 3865 3866 /* 3867 * Signed integer to floating-point conversions 3868 */ 3869 3870 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status) 3871 { 3872 FloatParts64 p; 3873 3874 parts_sint_to_float(&p, a, scale, status); 3875 return float16_round_pack_canonical(&p, status); 3876 } 3877 3878 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status) 3879 { 3880 return int64_to_float16_scalbn(a, scale, status); 3881 } 3882 3883 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status) 3884 { 3885 return int64_to_float16_scalbn(a, scale, status); 3886 } 3887 3888 float16 int64_to_float16(int64_t a, float_status *status) 3889 { 3890 return int64_to_float16_scalbn(a, 0, status); 3891 } 3892 3893 float16 int32_to_float16(int32_t a, float_status *status) 3894 { 3895 return int64_to_float16_scalbn(a, 0, status); 3896 } 3897 3898 float16 int16_to_float16(int16_t a, float_status *status) 3899 { 3900 return int64_to_float16_scalbn(a, 0, status); 3901 } 3902 3903 float16 int8_to_float16(int8_t a, float_status *status) 3904 { 3905 return int64_to_float16_scalbn(a, 0, status); 3906 } 3907 3908 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status) 3909 { 3910 FloatParts64 p; 3911 3912 /* Without scaling, there are no overflow concerns. */ 3913 if (likely(scale == 0) && can_use_fpu(status)) { 3914 union_float32 ur; 3915 ur.h = a; 3916 return ur.s; 3917 } 3918 3919 parts64_sint_to_float(&p, a, scale, status); 3920 return float32_round_pack_canonical(&p, status); 3921 } 3922 3923 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status) 3924 { 3925 return int64_to_float32_scalbn(a, scale, status); 3926 } 3927 3928 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status) 3929 { 3930 return int64_to_float32_scalbn(a, scale, status); 3931 } 3932 3933 float32 int64_to_float32(int64_t a, float_status *status) 3934 { 3935 return int64_to_float32_scalbn(a, 0, status); 3936 } 3937 3938 float32 int32_to_float32(int32_t a, float_status *status) 3939 { 3940 return int64_to_float32_scalbn(a, 0, status); 3941 } 3942 3943 float32 int16_to_float32(int16_t a, float_status *status) 3944 { 3945 return int64_to_float32_scalbn(a, 0, status); 3946 } 3947 3948 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status) 3949 { 3950 FloatParts64 p; 3951 3952 /* Without scaling, there are no overflow concerns. */ 3953 if (likely(scale == 0) && can_use_fpu(status)) { 3954 union_float64 ur; 3955 ur.h = a; 3956 return ur.s; 3957 } 3958 3959 parts_sint_to_float(&p, a, scale, status); 3960 return float64_round_pack_canonical(&p, status); 3961 } 3962 3963 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status) 3964 { 3965 return int64_to_float64_scalbn(a, scale, status); 3966 } 3967 3968 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status) 3969 { 3970 return int64_to_float64_scalbn(a, scale, status); 3971 } 3972 3973 float64 int64_to_float64(int64_t a, float_status *status) 3974 { 3975 return int64_to_float64_scalbn(a, 0, status); 3976 } 3977 3978 float64 int32_to_float64(int32_t a, float_status *status) 3979 { 3980 return int64_to_float64_scalbn(a, 0, status); 3981 } 3982 3983 float64 int16_to_float64(int16_t a, float_status *status) 3984 { 3985 return int64_to_float64_scalbn(a, 0, status); 3986 } 3987 3988 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status) 3989 { 3990 FloatParts64 p; 3991 3992 parts_sint_to_float(&p, a, scale, status); 3993 return bfloat16_round_pack_canonical(&p, status); 3994 } 3995 3996 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status) 3997 { 3998 return int64_to_bfloat16_scalbn(a, scale, status); 3999 } 4000 4001 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status) 4002 { 4003 return int64_to_bfloat16_scalbn(a, scale, status); 4004 } 4005 4006 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status) 4007 { 4008 return int64_to_bfloat16_scalbn(a, scale, status); 4009 } 4010 4011 bfloat16 int64_to_bfloat16(int64_t a, float_status *status) 4012 { 4013 return int64_to_bfloat16_scalbn(a, 0, status); 4014 } 4015 4016 bfloat16 int32_to_bfloat16(int32_t a, float_status *status) 4017 { 4018 return int64_to_bfloat16_scalbn(a, 0, status); 4019 } 4020 4021 bfloat16 int16_to_bfloat16(int16_t a, float_status *status) 4022 { 4023 return int64_to_bfloat16_scalbn(a, 0, status); 4024 } 4025 4026 bfloat16 int8_to_bfloat16(int8_t a, float_status *status) 4027 { 4028 return int64_to_bfloat16_scalbn(a, 0, status); 4029 } 4030 4031 float128 int128_to_float128(Int128 a, float_status *status) 4032 { 4033 FloatParts128 p = { }; 4034 int shift; 4035 4036 if (int128_nz(a)) { 4037 p.cls = float_class_normal; 4038 if (!int128_nonneg(a)) { 4039 p.sign = true; 4040 a = int128_neg(a); 4041 } 4042 4043 shift = clz64(int128_gethi(a)); 4044 if (shift == 64) { 4045 shift += clz64(int128_getlo(a)); 4046 } 4047 4048 p.exp = 127 - shift; 4049 a = int128_lshift(a, shift); 4050 4051 p.frac_hi = int128_gethi(a); 4052 p.frac_lo = int128_getlo(a); 4053 } else { 4054 p.cls = float_class_zero; 4055 } 4056 4057 return float128_round_pack_canonical(&p, status); 4058 } 4059 4060 float128 int64_to_float128(int64_t a, float_status *status) 4061 { 4062 FloatParts128 p; 4063 4064 parts_sint_to_float(&p, a, 0, status); 4065 return float128_round_pack_canonical(&p, status); 4066 } 4067 4068 float128 int32_to_float128(int32_t a, float_status *status) 4069 { 4070 return int64_to_float128(a, status); 4071 } 4072 4073 floatx80 int64_to_floatx80(int64_t a, float_status *status) 4074 { 4075 FloatParts128 p; 4076 4077 parts_sint_to_float(&p, a, 0, status); 4078 return floatx80_round_pack_canonical(&p, status); 4079 } 4080 4081 floatx80 int32_to_floatx80(int32_t a, float_status *status) 4082 { 4083 return int64_to_floatx80(a, status); 4084 } 4085 4086 /* 4087 * Unsigned Integer to floating-point conversions 4088 */ 4089 4090 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status) 4091 { 4092 FloatParts64 p; 4093 4094 parts_uint_to_float(&p, a, scale, status); 4095 return float16_round_pack_canonical(&p, status); 4096 } 4097 4098 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status) 4099 { 4100 return uint64_to_float16_scalbn(a, scale, status); 4101 } 4102 4103 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status) 4104 { 4105 return uint64_to_float16_scalbn(a, scale, status); 4106 } 4107 4108 float16 uint64_to_float16(uint64_t a, float_status *status) 4109 { 4110 return uint64_to_float16_scalbn(a, 0, status); 4111 } 4112 4113 float16 uint32_to_float16(uint32_t a, float_status *status) 4114 { 4115 return uint64_to_float16_scalbn(a, 0, status); 4116 } 4117 4118 float16 uint16_to_float16(uint16_t a, float_status *status) 4119 { 4120 return uint64_to_float16_scalbn(a, 0, status); 4121 } 4122 4123 float16 uint8_to_float16(uint8_t a, float_status *status) 4124 { 4125 return uint64_to_float16_scalbn(a, 0, status); 4126 } 4127 4128 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status) 4129 { 4130 FloatParts64 p; 4131 4132 /* Without scaling, there are no overflow concerns. */ 4133 if (likely(scale == 0) && can_use_fpu(status)) { 4134 union_float32 ur; 4135 ur.h = a; 4136 return ur.s; 4137 } 4138 4139 parts_uint_to_float(&p, a, scale, status); 4140 return float32_round_pack_canonical(&p, status); 4141 } 4142 4143 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status) 4144 { 4145 return uint64_to_float32_scalbn(a, scale, status); 4146 } 4147 4148 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status) 4149 { 4150 return uint64_to_float32_scalbn(a, scale, status); 4151 } 4152 4153 float32 uint64_to_float32(uint64_t a, float_status *status) 4154 { 4155 return uint64_to_float32_scalbn(a, 0, status); 4156 } 4157 4158 float32 uint32_to_float32(uint32_t a, float_status *status) 4159 { 4160 return uint64_to_float32_scalbn(a, 0, status); 4161 } 4162 4163 float32 uint16_to_float32(uint16_t a, float_status *status) 4164 { 4165 return uint64_to_float32_scalbn(a, 0, status); 4166 } 4167 4168 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status) 4169 { 4170 FloatParts64 p; 4171 4172 /* Without scaling, there are no overflow concerns. */ 4173 if (likely(scale == 0) && can_use_fpu(status)) { 4174 union_float64 ur; 4175 ur.h = a; 4176 return ur.s; 4177 } 4178 4179 parts_uint_to_float(&p, a, scale, status); 4180 return float64_round_pack_canonical(&p, status); 4181 } 4182 4183 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status) 4184 { 4185 return uint64_to_float64_scalbn(a, scale, status); 4186 } 4187 4188 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status) 4189 { 4190 return uint64_to_float64_scalbn(a, scale, status); 4191 } 4192 4193 float64 uint64_to_float64(uint64_t a, float_status *status) 4194 { 4195 return uint64_to_float64_scalbn(a, 0, status); 4196 } 4197 4198 float64 uint32_to_float64(uint32_t a, float_status *status) 4199 { 4200 return uint64_to_float64_scalbn(a, 0, status); 4201 } 4202 4203 float64 uint16_to_float64(uint16_t a, float_status *status) 4204 { 4205 return uint64_to_float64_scalbn(a, 0, status); 4206 } 4207 4208 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status) 4209 { 4210 FloatParts64 p; 4211 4212 parts_uint_to_float(&p, a, scale, status); 4213 return bfloat16_round_pack_canonical(&p, status); 4214 } 4215 4216 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status) 4217 { 4218 return uint64_to_bfloat16_scalbn(a, scale, status); 4219 } 4220 4221 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status) 4222 { 4223 return uint64_to_bfloat16_scalbn(a, scale, status); 4224 } 4225 4226 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status) 4227 { 4228 return uint64_to_bfloat16_scalbn(a, scale, status); 4229 } 4230 4231 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status) 4232 { 4233 return uint64_to_bfloat16_scalbn(a, 0, status); 4234 } 4235 4236 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status) 4237 { 4238 return uint64_to_bfloat16_scalbn(a, 0, status); 4239 } 4240 4241 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status) 4242 { 4243 return uint64_to_bfloat16_scalbn(a, 0, status); 4244 } 4245 4246 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status) 4247 { 4248 return uint64_to_bfloat16_scalbn(a, 0, status); 4249 } 4250 4251 float128 uint64_to_float128(uint64_t a, float_status *status) 4252 { 4253 FloatParts128 p; 4254 4255 parts_uint_to_float(&p, a, 0, status); 4256 return float128_round_pack_canonical(&p, status); 4257 } 4258 4259 float128 uint128_to_float128(Int128 a, float_status *status) 4260 { 4261 FloatParts128 p = { }; 4262 int shift; 4263 4264 if (int128_nz(a)) { 4265 p.cls = float_class_normal; 4266 4267 shift = clz64(int128_gethi(a)); 4268 if (shift == 64) { 4269 shift += clz64(int128_getlo(a)); 4270 } 4271 4272 p.exp = 127 - shift; 4273 a = int128_lshift(a, shift); 4274 4275 p.frac_hi = int128_gethi(a); 4276 p.frac_lo = int128_getlo(a); 4277 } else { 4278 p.cls = float_class_zero; 4279 } 4280 4281 return float128_round_pack_canonical(&p, status); 4282 } 4283 4284 /* 4285 * Minimum and maximum 4286 */ 4287 4288 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags) 4289 { 4290 FloatParts64 pa, pb, *pr; 4291 4292 float16_unpack_canonical(&pa, a, s); 4293 float16_unpack_canonical(&pb, b, s); 4294 pr = parts_minmax(&pa, &pb, s, flags); 4295 4296 return float16_round_pack_canonical(pr, s); 4297 } 4298 4299 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b, 4300 float_status *s, int flags) 4301 { 4302 FloatParts64 pa, pb, *pr; 4303 4304 bfloat16_unpack_canonical(&pa, a, s); 4305 bfloat16_unpack_canonical(&pb, b, s); 4306 pr = parts_minmax(&pa, &pb, s, flags); 4307 4308 return bfloat16_round_pack_canonical(pr, s); 4309 } 4310 4311 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags) 4312 { 4313 FloatParts64 pa, pb, *pr; 4314 4315 float32_unpack_canonical(&pa, a, s); 4316 float32_unpack_canonical(&pb, b, s); 4317 pr = parts_minmax(&pa, &pb, s, flags); 4318 4319 return float32_round_pack_canonical(pr, s); 4320 } 4321 4322 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags) 4323 { 4324 FloatParts64 pa, pb, *pr; 4325 4326 float64_unpack_canonical(&pa, a, s); 4327 float64_unpack_canonical(&pb, b, s); 4328 pr = parts_minmax(&pa, &pb, s, flags); 4329 4330 return float64_round_pack_canonical(pr, s); 4331 } 4332 4333 static float128 float128_minmax(float128 a, float128 b, 4334 float_status *s, int flags) 4335 { 4336 FloatParts128 pa, pb, *pr; 4337 4338 float128_unpack_canonical(&pa, a, s); 4339 float128_unpack_canonical(&pb, b, s); 4340 pr = parts_minmax(&pa, &pb, s, flags); 4341 4342 return float128_round_pack_canonical(pr, s); 4343 } 4344 4345 #define MINMAX_1(type, name, flags) \ 4346 type type##_##name(type a, type b, float_status *s) \ 4347 { return type##_minmax(a, b, s, flags); } 4348 4349 #define MINMAX_2(type) \ 4350 MINMAX_1(type, max, 0) \ 4351 MINMAX_1(type, maxnum, minmax_isnum) \ 4352 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \ 4353 MINMAX_1(type, maximum_number, minmax_isnumber) \ 4354 MINMAX_1(type, min, minmax_ismin) \ 4355 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \ 4356 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \ 4357 MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber) \ 4358 4359 MINMAX_2(float16) 4360 MINMAX_2(bfloat16) 4361 MINMAX_2(float32) 4362 MINMAX_2(float64) 4363 MINMAX_2(float128) 4364 4365 #undef MINMAX_1 4366 #undef MINMAX_2 4367 4368 /* 4369 * Floating point compare 4370 */ 4371 4372 static FloatRelation QEMU_FLATTEN 4373 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet) 4374 { 4375 FloatParts64 pa, pb; 4376 4377 float16_unpack_canonical(&pa, a, s); 4378 float16_unpack_canonical(&pb, b, s); 4379 return parts_compare(&pa, &pb, s, is_quiet); 4380 } 4381 4382 FloatRelation float16_compare(float16 a, float16 b, float_status *s) 4383 { 4384 return float16_do_compare(a, b, s, false); 4385 } 4386 4387 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s) 4388 { 4389 return float16_do_compare(a, b, s, true); 4390 } 4391 4392 static FloatRelation QEMU_SOFTFLOAT_ATTR 4393 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet) 4394 { 4395 FloatParts64 pa, pb; 4396 4397 float32_unpack_canonical(&pa, a, s); 4398 float32_unpack_canonical(&pb, b, s); 4399 return parts_compare(&pa, &pb, s, is_quiet); 4400 } 4401 4402 static FloatRelation QEMU_FLATTEN 4403 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet) 4404 { 4405 union_float32 ua, ub; 4406 4407 ua.s = xa; 4408 ub.s = xb; 4409 4410 if (QEMU_NO_HARDFLOAT) { 4411 goto soft; 4412 } 4413 4414 float32_input_flush2(&ua.s, &ub.s, s); 4415 if (isgreaterequal(ua.h, ub.h)) { 4416 if (isgreater(ua.h, ub.h)) { 4417 return float_relation_greater; 4418 } 4419 return float_relation_equal; 4420 } 4421 if (likely(isless(ua.h, ub.h))) { 4422 return float_relation_less; 4423 } 4424 /* 4425 * The only condition remaining is unordered. 4426 * Fall through to set flags. 4427 */ 4428 soft: 4429 return float32_do_compare(ua.s, ub.s, s, is_quiet); 4430 } 4431 4432 FloatRelation float32_compare(float32 a, float32 b, float_status *s) 4433 { 4434 return float32_hs_compare(a, b, s, false); 4435 } 4436 4437 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s) 4438 { 4439 return float32_hs_compare(a, b, s, true); 4440 } 4441 4442 static FloatRelation QEMU_SOFTFLOAT_ATTR 4443 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet) 4444 { 4445 FloatParts64 pa, pb; 4446 4447 float64_unpack_canonical(&pa, a, s); 4448 float64_unpack_canonical(&pb, b, s); 4449 return parts_compare(&pa, &pb, s, is_quiet); 4450 } 4451 4452 static FloatRelation QEMU_FLATTEN 4453 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet) 4454 { 4455 union_float64 ua, ub; 4456 4457 ua.s = xa; 4458 ub.s = xb; 4459 4460 if (QEMU_NO_HARDFLOAT) { 4461 goto soft; 4462 } 4463 4464 float64_input_flush2(&ua.s, &ub.s, s); 4465 if (isgreaterequal(ua.h, ub.h)) { 4466 if (isgreater(ua.h, ub.h)) { 4467 return float_relation_greater; 4468 } 4469 return float_relation_equal; 4470 } 4471 if (likely(isless(ua.h, ub.h))) { 4472 return float_relation_less; 4473 } 4474 /* 4475 * The only condition remaining is unordered. 4476 * Fall through to set flags. 4477 */ 4478 soft: 4479 return float64_do_compare(ua.s, ub.s, s, is_quiet); 4480 } 4481 4482 FloatRelation float64_compare(float64 a, float64 b, float_status *s) 4483 { 4484 return float64_hs_compare(a, b, s, false); 4485 } 4486 4487 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s) 4488 { 4489 return float64_hs_compare(a, b, s, true); 4490 } 4491 4492 static FloatRelation QEMU_FLATTEN 4493 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet) 4494 { 4495 FloatParts64 pa, pb; 4496 4497 bfloat16_unpack_canonical(&pa, a, s); 4498 bfloat16_unpack_canonical(&pb, b, s); 4499 return parts_compare(&pa, &pb, s, is_quiet); 4500 } 4501 4502 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s) 4503 { 4504 return bfloat16_do_compare(a, b, s, false); 4505 } 4506 4507 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s) 4508 { 4509 return bfloat16_do_compare(a, b, s, true); 4510 } 4511 4512 static FloatRelation QEMU_FLATTEN 4513 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet) 4514 { 4515 FloatParts128 pa, pb; 4516 4517 float128_unpack_canonical(&pa, a, s); 4518 float128_unpack_canonical(&pb, b, s); 4519 return parts_compare(&pa, &pb, s, is_quiet); 4520 } 4521 4522 FloatRelation float128_compare(float128 a, float128 b, float_status *s) 4523 { 4524 return float128_do_compare(a, b, s, false); 4525 } 4526 4527 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s) 4528 { 4529 return float128_do_compare(a, b, s, true); 4530 } 4531 4532 static FloatRelation QEMU_FLATTEN 4533 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet) 4534 { 4535 FloatParts128 pa, pb; 4536 4537 if (!floatx80_unpack_canonical(&pa, a, s) || 4538 !floatx80_unpack_canonical(&pb, b, s)) { 4539 return float_relation_unordered; 4540 } 4541 return parts_compare(&pa, &pb, s, is_quiet); 4542 } 4543 4544 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s) 4545 { 4546 return floatx80_do_compare(a, b, s, false); 4547 } 4548 4549 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s) 4550 { 4551 return floatx80_do_compare(a, b, s, true); 4552 } 4553 4554 /* 4555 * Scale by 2**N 4556 */ 4557 4558 float16 float16_scalbn(float16 a, int n, float_status *status) 4559 { 4560 FloatParts64 p; 4561 4562 float16_unpack_canonical(&p, a, status); 4563 parts_scalbn(&p, n, status); 4564 return float16_round_pack_canonical(&p, status); 4565 } 4566 4567 float32 float32_scalbn(float32 a, int n, float_status *status) 4568 { 4569 FloatParts64 p; 4570 4571 float32_unpack_canonical(&p, a, status); 4572 parts_scalbn(&p, n, status); 4573 return float32_round_pack_canonical(&p, status); 4574 } 4575 4576 float64 float64_scalbn(float64 a, int n, float_status *status) 4577 { 4578 FloatParts64 p; 4579 4580 float64_unpack_canonical(&p, a, status); 4581 parts_scalbn(&p, n, status); 4582 return float64_round_pack_canonical(&p, status); 4583 } 4584 4585 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status) 4586 { 4587 FloatParts64 p; 4588 4589 bfloat16_unpack_canonical(&p, a, status); 4590 parts_scalbn(&p, n, status); 4591 return bfloat16_round_pack_canonical(&p, status); 4592 } 4593 4594 float128 float128_scalbn(float128 a, int n, float_status *status) 4595 { 4596 FloatParts128 p; 4597 4598 float128_unpack_canonical(&p, a, status); 4599 parts_scalbn(&p, n, status); 4600 return float128_round_pack_canonical(&p, status); 4601 } 4602 4603 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) 4604 { 4605 FloatParts128 p; 4606 4607 if (!floatx80_unpack_canonical(&p, a, status)) { 4608 return floatx80_default_nan(status); 4609 } 4610 parts_scalbn(&p, n, status); 4611 return floatx80_round_pack_canonical(&p, status); 4612 } 4613 4614 /* 4615 * Square Root 4616 */ 4617 4618 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status) 4619 { 4620 FloatParts64 p; 4621 4622 float16_unpack_canonical(&p, a, status); 4623 parts_sqrt(&p, status, &float16_params); 4624 return float16_round_pack_canonical(&p, status); 4625 } 4626 4627 static float32 QEMU_SOFTFLOAT_ATTR 4628 soft_f32_sqrt(float32 a, float_status *status) 4629 { 4630 FloatParts64 p; 4631 4632 float32_unpack_canonical(&p, a, status); 4633 parts_sqrt(&p, status, &float32_params); 4634 return float32_round_pack_canonical(&p, status); 4635 } 4636 4637 static float64 QEMU_SOFTFLOAT_ATTR 4638 soft_f64_sqrt(float64 a, float_status *status) 4639 { 4640 FloatParts64 p; 4641 4642 float64_unpack_canonical(&p, a, status); 4643 parts_sqrt(&p, status, &float64_params); 4644 return float64_round_pack_canonical(&p, status); 4645 } 4646 4647 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s) 4648 { 4649 union_float32 ua, ur; 4650 4651 ua.s = xa; 4652 if (unlikely(!can_use_fpu(s))) { 4653 goto soft; 4654 } 4655 4656 float32_input_flush1(&ua.s, s); 4657 if (QEMU_HARDFLOAT_1F32_USE_FP) { 4658 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL || 4659 fpclassify(ua.h) == FP_ZERO) || 4660 signbit(ua.h))) { 4661 goto soft; 4662 } 4663 } else if (unlikely(!float32_is_zero_or_normal(ua.s) || 4664 float32_is_neg(ua.s))) { 4665 goto soft; 4666 } 4667 ur.h = sqrtf(ua.h); 4668 return ur.s; 4669 4670 soft: 4671 return soft_f32_sqrt(ua.s, s); 4672 } 4673 4674 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s) 4675 { 4676 union_float64 ua, ur; 4677 4678 ua.s = xa; 4679 if (unlikely(!can_use_fpu(s))) { 4680 goto soft; 4681 } 4682 4683 float64_input_flush1(&ua.s, s); 4684 if (QEMU_HARDFLOAT_1F64_USE_FP) { 4685 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL || 4686 fpclassify(ua.h) == FP_ZERO) || 4687 signbit(ua.h))) { 4688 goto soft; 4689 } 4690 } else if (unlikely(!float64_is_zero_or_normal(ua.s) || 4691 float64_is_neg(ua.s))) { 4692 goto soft; 4693 } 4694 ur.h = sqrt(ua.h); 4695 return ur.s; 4696 4697 soft: 4698 return soft_f64_sqrt(ua.s, s); 4699 } 4700 4701 float64 float64r32_sqrt(float64 a, float_status *status) 4702 { 4703 FloatParts64 p; 4704 4705 float64_unpack_canonical(&p, a, status); 4706 parts_sqrt(&p, status, &float64_params); 4707 return float64r32_round_pack_canonical(&p, status); 4708 } 4709 4710 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status) 4711 { 4712 FloatParts64 p; 4713 4714 bfloat16_unpack_canonical(&p, a, status); 4715 parts_sqrt(&p, status, &bfloat16_params); 4716 return bfloat16_round_pack_canonical(&p, status); 4717 } 4718 4719 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status) 4720 { 4721 FloatParts128 p; 4722 4723 float128_unpack_canonical(&p, a, status); 4724 parts_sqrt(&p, status, &float128_params); 4725 return float128_round_pack_canonical(&p, status); 4726 } 4727 4728 floatx80 floatx80_sqrt(floatx80 a, float_status *s) 4729 { 4730 FloatParts128 p; 4731 4732 if (!floatx80_unpack_canonical(&p, a, s)) { 4733 return floatx80_default_nan(s); 4734 } 4735 parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]); 4736 return floatx80_round_pack_canonical(&p, s); 4737 } 4738 4739 /* 4740 * log2 4741 */ 4742 float32 float32_log2(float32 a, float_status *status) 4743 { 4744 FloatParts64 p; 4745 4746 float32_unpack_canonical(&p, a, status); 4747 parts_log2(&p, status, &float32_params); 4748 return float32_round_pack_canonical(&p, status); 4749 } 4750 4751 float64 float64_log2(float64 a, float_status *status) 4752 { 4753 FloatParts64 p; 4754 4755 float64_unpack_canonical(&p, a, status); 4756 parts_log2(&p, status, &float64_params); 4757 return float64_round_pack_canonical(&p, status); 4758 } 4759 4760 /*---------------------------------------------------------------------------- 4761 | The pattern for a default generated NaN. 4762 *----------------------------------------------------------------------------*/ 4763 4764 float16 float16_default_nan(float_status *status) 4765 { 4766 FloatParts64 p; 4767 4768 parts_default_nan(&p, status); 4769 p.frac >>= float16_params.frac_shift; 4770 return float16_pack_raw(&p); 4771 } 4772 4773 float32 float32_default_nan(float_status *status) 4774 { 4775 FloatParts64 p; 4776 4777 parts_default_nan(&p, status); 4778 p.frac >>= float32_params.frac_shift; 4779 return float32_pack_raw(&p); 4780 } 4781 4782 float64 float64_default_nan(float_status *status) 4783 { 4784 FloatParts64 p; 4785 4786 parts_default_nan(&p, status); 4787 p.frac >>= float64_params.frac_shift; 4788 return float64_pack_raw(&p); 4789 } 4790 4791 float128 float128_default_nan(float_status *status) 4792 { 4793 FloatParts128 p; 4794 4795 parts_default_nan(&p, status); 4796 frac_shr(&p, float128_params.frac_shift); 4797 return float128_pack_raw(&p); 4798 } 4799 4800 bfloat16 bfloat16_default_nan(float_status *status) 4801 { 4802 FloatParts64 p; 4803 4804 parts_default_nan(&p, status); 4805 p.frac >>= bfloat16_params.frac_shift; 4806 return bfloat16_pack_raw(&p); 4807 } 4808 4809 /*---------------------------------------------------------------------------- 4810 | Returns a quiet NaN from a signalling NaN for the floating point value `a'. 4811 *----------------------------------------------------------------------------*/ 4812 4813 float16 float16_silence_nan(float16 a, float_status *status) 4814 { 4815 FloatParts64 p; 4816 4817 float16_unpack_raw(&p, a); 4818 p.frac <<= float16_params.frac_shift; 4819 parts_silence_nan(&p, status); 4820 p.frac >>= float16_params.frac_shift; 4821 return float16_pack_raw(&p); 4822 } 4823 4824 float32 float32_silence_nan(float32 a, float_status *status) 4825 { 4826 FloatParts64 p; 4827 4828 float32_unpack_raw(&p, a); 4829 p.frac <<= float32_params.frac_shift; 4830 parts_silence_nan(&p, status); 4831 p.frac >>= float32_params.frac_shift; 4832 return float32_pack_raw(&p); 4833 } 4834 4835 float64 float64_silence_nan(float64 a, float_status *status) 4836 { 4837 FloatParts64 p; 4838 4839 float64_unpack_raw(&p, a); 4840 p.frac <<= float64_params.frac_shift; 4841 parts_silence_nan(&p, status); 4842 p.frac >>= float64_params.frac_shift; 4843 return float64_pack_raw(&p); 4844 } 4845 4846 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status) 4847 { 4848 FloatParts64 p; 4849 4850 bfloat16_unpack_raw(&p, a); 4851 p.frac <<= bfloat16_params.frac_shift; 4852 parts_silence_nan(&p, status); 4853 p.frac >>= bfloat16_params.frac_shift; 4854 return bfloat16_pack_raw(&p); 4855 } 4856 4857 float128 float128_silence_nan(float128 a, float_status *status) 4858 { 4859 FloatParts128 p; 4860 4861 float128_unpack_raw(&p, a); 4862 frac_shl(&p, float128_params.frac_shift); 4863 parts_silence_nan(&p, status); 4864 frac_shr(&p, float128_params.frac_shift); 4865 return float128_pack_raw(&p); 4866 } 4867 4868 /*---------------------------------------------------------------------------- 4869 | If `a' is denormal and we are in flush-to-zero mode then set the 4870 | input-denormal exception and return zero. Otherwise just return the value. 4871 *----------------------------------------------------------------------------*/ 4872 4873 static bool parts_squash_denormal(FloatParts64 p, float_status *status) 4874 { 4875 if (p.exp == 0 && p.frac != 0) { 4876 float_raise(float_flag_input_denormal_flushed, status); 4877 return true; 4878 } 4879 4880 return false; 4881 } 4882 4883 float16 float16_squash_input_denormal(float16 a, float_status *status) 4884 { 4885 if (status->flush_inputs_to_zero) { 4886 FloatParts64 p; 4887 4888 float16_unpack_raw(&p, a); 4889 if (parts_squash_denormal(p, status)) { 4890 return float16_set_sign(float16_zero, p.sign); 4891 } 4892 } 4893 return a; 4894 } 4895 4896 float32 float32_squash_input_denormal(float32 a, float_status *status) 4897 { 4898 if (status->flush_inputs_to_zero) { 4899 FloatParts64 p; 4900 4901 float32_unpack_raw(&p, a); 4902 if (parts_squash_denormal(p, status)) { 4903 return float32_set_sign(float32_zero, p.sign); 4904 } 4905 } 4906 return a; 4907 } 4908 4909 float64 float64_squash_input_denormal(float64 a, float_status *status) 4910 { 4911 if (status->flush_inputs_to_zero) { 4912 FloatParts64 p; 4913 4914 float64_unpack_raw(&p, a); 4915 if (parts_squash_denormal(p, status)) { 4916 return float64_set_sign(float64_zero, p.sign); 4917 } 4918 } 4919 return a; 4920 } 4921 4922 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status) 4923 { 4924 if (status->flush_inputs_to_zero) { 4925 FloatParts64 p; 4926 4927 bfloat16_unpack_raw(&p, a); 4928 if (parts_squash_denormal(p, status)) { 4929 return bfloat16_set_sign(bfloat16_zero, p.sign); 4930 } 4931 } 4932 return a; 4933 } 4934 4935 /*---------------------------------------------------------------------------- 4936 | Normalizes the subnormal extended double-precision floating-point value 4937 | represented by the denormalized significand `aSig'. The normalized exponent 4938 | and significand are stored at the locations pointed to by `zExpPtr' and 4939 | `zSigPtr', respectively. 4940 *----------------------------------------------------------------------------*/ 4941 4942 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, 4943 uint64_t *zSigPtr) 4944 { 4945 int8_t shiftCount; 4946 4947 shiftCount = clz64(aSig); 4948 *zSigPtr = aSig<<shiftCount; 4949 *zExpPtr = 1 - shiftCount; 4950 } 4951 4952 /*---------------------------------------------------------------------------- 4953 | Takes two extended double-precision floating-point values `a' and `b', one 4954 | of which is a NaN, and returns the appropriate NaN result. If either `a' or 4955 | `b' is a signaling NaN, the invalid exception is raised. 4956 *----------------------------------------------------------------------------*/ 4957 4958 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status) 4959 { 4960 FloatParts128 pa, pb, *pr; 4961 4962 if (!floatx80_unpack_canonical(&pa, a, status) || 4963 !floatx80_unpack_canonical(&pb, b, status)) { 4964 return floatx80_default_nan(status); 4965 } 4966 4967 pr = parts_pick_nan(&pa, &pb, status); 4968 return floatx80_round_pack_canonical(pr, status); 4969 } 4970 4971 /*---------------------------------------------------------------------------- 4972 | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 4973 | and extended significand formed by the concatenation of `zSig0' and `zSig1', 4974 | and returns the proper extended double-precision floating-point value 4975 | corresponding to the abstract input. Ordinarily, the abstract value is 4976 | rounded and packed into the extended double-precision format, with the 4977 | inexact exception raised if the abstract input cannot be represented 4978 | exactly. However, if the abstract value is too large, the overflow and 4979 | inexact exceptions are raised and an infinity or maximal finite value is 4980 | returned. If the abstract value is too small, the input value is rounded to 4981 | a subnormal number, and the underflow and inexact exceptions are raised if 4982 | the abstract input cannot be represented exactly as a subnormal extended 4983 | double-precision floating-point number. 4984 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d, 4985 | the result is rounded to the same number of bits as single or double 4986 | precision, respectively. Otherwise, the result is rounded to the full 4987 | precision of the extended double-precision format. 4988 | The input significand must be normalized or smaller. If the input 4989 | significand is not normalized, `zExp' must be 0; in that case, the result 4990 | returned is a subnormal number, and it must not require rounding. The 4991 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary 4992 | Floating-Point Arithmetic. 4993 *----------------------------------------------------------------------------*/ 4994 4995 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign, 4996 int32_t zExp, uint64_t zSig0, uint64_t zSig1, 4997 float_status *status) 4998 { 4999 FloatRoundMode roundingMode; 5000 bool roundNearestEven, increment, isTiny; 5001 int64_t roundIncrement, roundMask, roundBits; 5002 5003 roundingMode = status->float_rounding_mode; 5004 roundNearestEven = ( roundingMode == float_round_nearest_even ); 5005 switch (roundingPrecision) { 5006 case floatx80_precision_x: 5007 goto precision80; 5008 case floatx80_precision_d: 5009 roundIncrement = UINT64_C(0x0000000000000400); 5010 roundMask = UINT64_C(0x00000000000007FF); 5011 break; 5012 case floatx80_precision_s: 5013 roundIncrement = UINT64_C(0x0000008000000000); 5014 roundMask = UINT64_C(0x000000FFFFFFFFFF); 5015 break; 5016 default: 5017 g_assert_not_reached(); 5018 } 5019 zSig0 |= ( zSig1 != 0 ); 5020 switch (roundingMode) { 5021 case float_round_nearest_even: 5022 case float_round_ties_away: 5023 break; 5024 case float_round_to_zero: 5025 roundIncrement = 0; 5026 break; 5027 case float_round_up: 5028 roundIncrement = zSign ? 0 : roundMask; 5029 break; 5030 case float_round_down: 5031 roundIncrement = zSign ? roundMask : 0; 5032 break; 5033 default: 5034 abort(); 5035 } 5036 roundBits = zSig0 & roundMask; 5037 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { 5038 if ( ( 0x7FFE < zExp ) 5039 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) 5040 ) { 5041 goto overflow; 5042 } 5043 if ( zExp <= 0 ) { 5044 if (status->flush_to_zero) { 5045 float_raise(float_flag_output_denormal_flushed, status); 5046 return packFloatx80(zSign, 0, 0); 5047 } 5048 isTiny = status->tininess_before_rounding 5049 || (zExp < 0 ) 5050 || (zSig0 <= zSig0 + roundIncrement); 5051 shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); 5052 zExp = 0; 5053 roundBits = zSig0 & roundMask; 5054 if (isTiny && roundBits) { 5055 float_raise(float_flag_underflow, status); 5056 } 5057 if (roundBits) { 5058 float_raise(float_flag_inexact, status); 5059 } 5060 zSig0 += roundIncrement; 5061 if ( (int64_t) zSig0 < 0 ) zExp = 1; 5062 roundIncrement = roundMask + 1; 5063 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 5064 roundMask |= roundIncrement; 5065 } 5066 zSig0 &= ~ roundMask; 5067 return packFloatx80( zSign, zExp, zSig0 ); 5068 } 5069 } 5070 if (roundBits) { 5071 float_raise(float_flag_inexact, status); 5072 } 5073 zSig0 += roundIncrement; 5074 if ( zSig0 < roundIncrement ) { 5075 ++zExp; 5076 zSig0 = UINT64_C(0x8000000000000000); 5077 } 5078 roundIncrement = roundMask + 1; 5079 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 5080 roundMask |= roundIncrement; 5081 } 5082 zSig0 &= ~ roundMask; 5083 if ( zSig0 == 0 ) zExp = 0; 5084 return packFloatx80( zSign, zExp, zSig0 ); 5085 precision80: 5086 switch (roundingMode) { 5087 case float_round_nearest_even: 5088 case float_round_ties_away: 5089 increment = ((int64_t)zSig1 < 0); 5090 break; 5091 case float_round_to_zero: 5092 increment = 0; 5093 break; 5094 case float_round_up: 5095 increment = !zSign && zSig1; 5096 break; 5097 case float_round_down: 5098 increment = zSign && zSig1; 5099 break; 5100 default: 5101 abort(); 5102 } 5103 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { 5104 if ( ( 0x7FFE < zExp ) 5105 || ( ( zExp == 0x7FFE ) 5106 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) ) 5107 && increment 5108 ) 5109 ) { 5110 roundMask = 0; 5111 overflow: 5112 float_raise(float_flag_overflow | float_flag_inexact, status); 5113 if ( ( roundingMode == float_round_to_zero ) 5114 || ( zSign && ( roundingMode == float_round_up ) ) 5115 || ( ! zSign && ( roundingMode == float_round_down ) ) 5116 ) { 5117 return packFloatx80( zSign, 0x7FFE, ~ roundMask ); 5118 } 5119 return packFloatx80(zSign, 5120 floatx80_infinity_high, 5121 floatx80_infinity_low); 5122 } 5123 if ( zExp <= 0 ) { 5124 isTiny = status->tininess_before_rounding 5125 || (zExp < 0) 5126 || !increment 5127 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF)); 5128 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); 5129 zExp = 0; 5130 if (isTiny && zSig1) { 5131 float_raise(float_flag_underflow, status); 5132 } 5133 if (zSig1) { 5134 float_raise(float_flag_inexact, status); 5135 } 5136 switch (roundingMode) { 5137 case float_round_nearest_even: 5138 case float_round_ties_away: 5139 increment = ((int64_t)zSig1 < 0); 5140 break; 5141 case float_round_to_zero: 5142 increment = 0; 5143 break; 5144 case float_round_up: 5145 increment = !zSign && zSig1; 5146 break; 5147 case float_round_down: 5148 increment = zSign && zSig1; 5149 break; 5150 default: 5151 abort(); 5152 } 5153 if ( increment ) { 5154 ++zSig0; 5155 if (!(zSig1 << 1) && roundNearestEven) { 5156 zSig0 &= ~1; 5157 } 5158 if ( (int64_t) zSig0 < 0 ) zExp = 1; 5159 } 5160 return packFloatx80( zSign, zExp, zSig0 ); 5161 } 5162 } 5163 if (zSig1) { 5164 float_raise(float_flag_inexact, status); 5165 } 5166 if ( increment ) { 5167 ++zSig0; 5168 if ( zSig0 == 0 ) { 5169 ++zExp; 5170 zSig0 = UINT64_C(0x8000000000000000); 5171 } 5172 else { 5173 if (!(zSig1 << 1) && roundNearestEven) { 5174 zSig0 &= ~1; 5175 } 5176 } 5177 } 5178 else { 5179 if ( zSig0 == 0 ) zExp = 0; 5180 } 5181 return packFloatx80( zSign, zExp, zSig0 ); 5182 5183 } 5184 5185 /*---------------------------------------------------------------------------- 5186 | Takes an abstract floating-point value having sign `zSign', exponent 5187 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', 5188 | and returns the proper extended double-precision floating-point value 5189 | corresponding to the abstract input. This routine is just like 5190 | `roundAndPackFloatx80' except that the input significand does not have to be 5191 | normalized. 5192 *----------------------------------------------------------------------------*/ 5193 5194 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, 5195 bool zSign, int32_t zExp, 5196 uint64_t zSig0, uint64_t zSig1, 5197 float_status *status) 5198 { 5199 int8_t shiftCount; 5200 5201 if ( zSig0 == 0 ) { 5202 zSig0 = zSig1; 5203 zSig1 = 0; 5204 zExp -= 64; 5205 } 5206 shiftCount = clz64(zSig0); 5207 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 5208 zExp -= shiftCount; 5209 return roundAndPackFloatx80(roundingPrecision, zSign, zExp, 5210 zSig0, zSig1, status); 5211 5212 } 5213 5214 /*---------------------------------------------------------------------------- 5215 | Returns the binary exponential of the single-precision floating-point value 5216 | `a'. The operation is performed according to the IEC/IEEE Standard for 5217 | Binary Floating-Point Arithmetic. 5218 | 5219 | Uses the following identities: 5220 | 5221 | 1. ------------------------------------------------------------------------- 5222 | x x*ln(2) 5223 | 2 = e 5224 | 5225 | 2. ------------------------------------------------------------------------- 5226 | 2 3 4 5 n 5227 | x x x x x x x 5228 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ... 5229 | 1! 2! 3! 4! 5! n! 5230 *----------------------------------------------------------------------------*/ 5231 5232 static const float64 float32_exp2_coefficients[15] = 5233 { 5234 const_float64( 0x3ff0000000000000ll ), /* 1 */ 5235 const_float64( 0x3fe0000000000000ll ), /* 2 */ 5236 const_float64( 0x3fc5555555555555ll ), /* 3 */ 5237 const_float64( 0x3fa5555555555555ll ), /* 4 */ 5238 const_float64( 0x3f81111111111111ll ), /* 5 */ 5239 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */ 5240 const_float64( 0x3f2a01a01a01a01all ), /* 7 */ 5241 const_float64( 0x3efa01a01a01a01all ), /* 8 */ 5242 const_float64( 0x3ec71de3a556c734ll ), /* 9 */ 5243 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */ 5244 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */ 5245 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */ 5246 const_float64( 0x3de6124613a86d09ll ), /* 13 */ 5247 const_float64( 0x3da93974a8c07c9dll ), /* 14 */ 5248 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */ 5249 }; 5250 5251 float32 float32_exp2(float32 a, float_status *status) 5252 { 5253 FloatParts64 xp, xnp, tp, rp; 5254 int i; 5255 5256 float32_unpack_canonical(&xp, a, status); 5257 if (unlikely(xp.cls != float_class_normal)) { 5258 switch (xp.cls) { 5259 case float_class_denormal: 5260 break; 5261 case float_class_snan: 5262 case float_class_qnan: 5263 parts_return_nan(&xp, status); 5264 return float32_round_pack_canonical(&xp, status); 5265 case float_class_inf: 5266 return xp.sign ? float32_zero : a; 5267 case float_class_zero: 5268 return float32_one; 5269 default: 5270 g_assert_not_reached(); 5271 } 5272 } 5273 5274 float_raise(float_flag_inexact, status); 5275 5276 float64_unpack_canonical(&tp, float64_ln2, status); 5277 xp = *parts_mul(&xp, &tp, status); 5278 xnp = xp; 5279 5280 float64_unpack_canonical(&rp, float64_one, status); 5281 for (i = 0 ; i < 15 ; i++) { 5282 5283 float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status); 5284 rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status); 5285 xnp = *parts_mul(&xnp, &xp, status); 5286 } 5287 5288 return float32_round_pack_canonical(&rp, status); 5289 } 5290 5291 /*---------------------------------------------------------------------------- 5292 | Rounds the extended double-precision floating-point value `a' 5293 | to the precision provided by floatx80_rounding_precision and returns the 5294 | result as an extended double-precision floating-point value. 5295 | The operation is performed according to the IEC/IEEE Standard for Binary 5296 | Floating-Point Arithmetic. 5297 *----------------------------------------------------------------------------*/ 5298 5299 floatx80 floatx80_round(floatx80 a, float_status *status) 5300 { 5301 FloatParts128 p; 5302 5303 if (!floatx80_unpack_canonical(&p, a, status)) { 5304 return floatx80_default_nan(status); 5305 } 5306 return floatx80_round_pack_canonical(&p, status); 5307 } 5308 5309 static void __attribute__((constructor)) softfloat_init(void) 5310 { 5311 union_float64 ua, ub, uc, ur; 5312 5313 if (QEMU_NO_HARDFLOAT) { 5314 return; 5315 } 5316 /* 5317 * Test that the host's FMA is not obviously broken. For example, 5318 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see 5319 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304 5320 */ 5321 ua.s = 0x0020000000000001ULL; 5322 ub.s = 0x3ca0000000000000ULL; 5323 uc.s = 0x0020000000000000ULL; 5324 ur.h = fma(ua.h, ub.h, uc.h); 5325 if (ur.s != 0x0020000000000001ULL) { 5326 force_soft_fma = true; 5327 } 5328 } 5329