xref: /qemu/fpu/softfloat.c (revision 09951f5a27a7f8633118c1808cf17e66b30c3c62)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 #include "qemu/osdep.h"
83 #include <math.h>
84 #include "qemu/bitops.h"
85 #include "fpu/softfloat.h"
86 
87 /* We only need stdlib for abort() */
88 
89 /*----------------------------------------------------------------------------
90 | Primitive arithmetic functions, including multi-word arithmetic, and
91 | division and square root approximations.  (Can be specialized to target if
92 | desired.)
93 *----------------------------------------------------------------------------*/
94 #include "fpu/softfloat-macros.h"
95 
96 /*
97  * Hardfloat
98  *
99  * Fast emulation of guest FP instructions is challenging for two reasons.
100  * First, FP instruction semantics are similar but not identical, particularly
101  * when handling NaNs. Second, emulating at reasonable speed the guest FP
102  * exception flags is not trivial: reading the host's flags register with a
103  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
104  * and trapping on every FP exception is not fast nor pleasant to work with.
105  *
106  * We address these challenges by leveraging the host FPU for a subset of the
107  * operations. To do this we expand on the idea presented in this paper:
108  *
109  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
110  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
111  *
112  * The idea is thus to leverage the host FPU to (1) compute FP operations
113  * and (2) identify whether FP exceptions occurred while avoiding
114  * expensive exception flag register accesses.
115  *
116  * An important optimization shown in the paper is that given that exception
117  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
118  * This is particularly useful for the inexact flag, which is very frequently
119  * raised in floating-point workloads.
120  *
121  * We optimize the code further by deferring to soft-fp whenever FP exception
122  * detection might get hairy. Two examples: (1) when at least one operand is
123  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
124  * and the result is < the minimum normal.
125  */
126 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
127     static inline void name(soft_t *a, float_status *s)                 \
128     {                                                                   \
129         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
130             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
131                                      soft_t ## _is_neg(*a));            \
132             float_raise(float_flag_input_denormal_flushed, s);          \
133         }                                                               \
134     }
135 
GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck,float32)136 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
137 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
138 #undef GEN_INPUT_FLUSH__NOCHECK
139 
140 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
141     static inline void name(soft_t *a, float_status *s) \
142     {                                                   \
143         if (likely(!s->flush_inputs_to_zero)) {         \
144             return;                                     \
145         }                                               \
146         soft_t ## _input_flush__nocheck(a, s);          \
147     }
148 
149 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
150 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
151 #undef GEN_INPUT_FLUSH1
152 
153 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
154     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
155     {                                                                   \
156         if (likely(!s->flush_inputs_to_zero)) {                         \
157             return;                                                     \
158         }                                                               \
159         soft_t ## _input_flush__nocheck(a, s);                          \
160         soft_t ## _input_flush__nocheck(b, s);                          \
161     }
162 
163 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
164 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
165 #undef GEN_INPUT_FLUSH2
166 
167 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
168     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
169     {                                                                   \
170         if (likely(!s->flush_inputs_to_zero)) {                         \
171             return;                                                     \
172         }                                                               \
173         soft_t ## _input_flush__nocheck(a, s);                          \
174         soft_t ## _input_flush__nocheck(b, s);                          \
175         soft_t ## _input_flush__nocheck(c, s);                          \
176     }
177 
178 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
179 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
180 #undef GEN_INPUT_FLUSH3
181 
182 /*
183  * Choose whether to use fpclassify or float32/64_* primitives in the generated
184  * hardfloat functions. Each combination of number of inputs and float size
185  * gets its own value.
186  */
187 #if defined(__x86_64__)
188 # define QEMU_HARDFLOAT_1F32_USE_FP 0
189 # define QEMU_HARDFLOAT_1F64_USE_FP 1
190 # define QEMU_HARDFLOAT_2F32_USE_FP 0
191 # define QEMU_HARDFLOAT_2F64_USE_FP 1
192 # define QEMU_HARDFLOAT_3F32_USE_FP 0
193 # define QEMU_HARDFLOAT_3F64_USE_FP 1
194 #else
195 # define QEMU_HARDFLOAT_1F32_USE_FP 0
196 # define QEMU_HARDFLOAT_1F64_USE_FP 0
197 # define QEMU_HARDFLOAT_2F32_USE_FP 0
198 # define QEMU_HARDFLOAT_2F64_USE_FP 0
199 # define QEMU_HARDFLOAT_3F32_USE_FP 0
200 # define QEMU_HARDFLOAT_3F64_USE_FP 0
201 #endif
202 
203 /*
204  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
205  * float{32,64}_is_infinity when !USE_FP.
206  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
207  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
208  */
209 #if defined(__x86_64__) || defined(__aarch64__)
210 # define QEMU_HARDFLOAT_USE_ISINF   1
211 #else
212 # define QEMU_HARDFLOAT_USE_ISINF   0
213 #endif
214 
215 /*
216  * Some targets clear the FP flags before most FP operations. This prevents
217  * the use of hardfloat, since hardfloat relies on the inexact flag being
218  * already set.
219  */
220 # if defined(__FAST_MATH__)
221 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
222     IEEE implementation
223 # define QEMU_NO_HARDFLOAT 1
224 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
225 #else
226 # define QEMU_NO_HARDFLOAT 0
227 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
228 #endif
229 
230 static inline bool can_use_fpu(const float_status *s)
231 {
232     if (QEMU_NO_HARDFLOAT) {
233         return false;
234     }
235     return likely(s->float_exception_flags & float_flag_inexact &&
236                   s->float_rounding_mode == float_round_nearest_even);
237 }
238 
239 /*
240  * Hardfloat generation functions. Each operation can have two flavors:
241  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
242  * most condition checks, or native ones (e.g. fpclassify).
243  *
244  * The flavor is chosen by the callers. Instead of using macros, we rely on the
245  * compiler to propagate constants and inline everything into the callers.
246  *
247  * We only generate functions for operations with two inputs, since only
248  * these are common enough to justify consolidating them into common code.
249  */
250 
251 typedef union {
252     float32 s;
253     float h;
254 } union_float32;
255 
256 typedef union {
257     float64 s;
258     double h;
259 } union_float64;
260 
261 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
262 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
263 
264 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
265 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
266 typedef float   (*hard_f32_op2_fn)(float a, float b);
267 typedef double  (*hard_f64_op2_fn)(double a, double b);
268 
269 /* 2-input is-zero-or-normal */
f32_is_zon2(union_float32 a,union_float32 b)270 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
271 {
272     if (QEMU_HARDFLOAT_2F32_USE_FP) {
273         /*
274          * Not using a temp variable for consecutive fpclassify calls ends up
275          * generating faster code.
276          */
277         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
278                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
279     }
280     return float32_is_zero_or_normal(a.s) &&
281            float32_is_zero_or_normal(b.s);
282 }
283 
f64_is_zon2(union_float64 a,union_float64 b)284 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
285 {
286     if (QEMU_HARDFLOAT_2F64_USE_FP) {
287         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
288                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
289     }
290     return float64_is_zero_or_normal(a.s) &&
291            float64_is_zero_or_normal(b.s);
292 }
293 
294 /* 3-input is-zero-or-normal */
295 static inline
f32_is_zon3(union_float32 a,union_float32 b,union_float32 c)296 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
297 {
298     if (QEMU_HARDFLOAT_3F32_USE_FP) {
299         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
300                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
301                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
302     }
303     return float32_is_zero_or_normal(a.s) &&
304            float32_is_zero_or_normal(b.s) &&
305            float32_is_zero_or_normal(c.s);
306 }
307 
308 static inline
f64_is_zon3(union_float64 a,union_float64 b,union_float64 c)309 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
310 {
311     if (QEMU_HARDFLOAT_3F64_USE_FP) {
312         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
313                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
314                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
315     }
316     return float64_is_zero_or_normal(a.s) &&
317            float64_is_zero_or_normal(b.s) &&
318            float64_is_zero_or_normal(c.s);
319 }
320 
f32_is_inf(union_float32 a)321 static inline bool f32_is_inf(union_float32 a)
322 {
323     if (QEMU_HARDFLOAT_USE_ISINF) {
324         return isinf(a.h);
325     }
326     return float32_is_infinity(a.s);
327 }
328 
f64_is_inf(union_float64 a)329 static inline bool f64_is_inf(union_float64 a)
330 {
331     if (QEMU_HARDFLOAT_USE_ISINF) {
332         return isinf(a.h);
333     }
334     return float64_is_infinity(a.s);
335 }
336 
337 static inline float32
float32_gen2(float32 xa,float32 xb,float_status * s,hard_f32_op2_fn hard,soft_f32_op2_fn soft,f32_check_fn pre,f32_check_fn post)338 float32_gen2(float32 xa, float32 xb, float_status *s,
339              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
340              f32_check_fn pre, f32_check_fn post)
341 {
342     union_float32 ua, ub, ur;
343 
344     ua.s = xa;
345     ub.s = xb;
346 
347     if (unlikely(!can_use_fpu(s))) {
348         goto soft;
349     }
350 
351     float32_input_flush2(&ua.s, &ub.s, s);
352     if (unlikely(!pre(ua, ub))) {
353         goto soft;
354     }
355 
356     ur.h = hard(ua.h, ub.h);
357     if (unlikely(f32_is_inf(ur))) {
358         float_raise(float_flag_overflow, s);
359     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
360         goto soft;
361     }
362     return ur.s;
363 
364  soft:
365     return soft(ua.s, ub.s, s);
366 }
367 
368 static inline float64
float64_gen2(float64 xa,float64 xb,float_status * s,hard_f64_op2_fn hard,soft_f64_op2_fn soft,f64_check_fn pre,f64_check_fn post)369 float64_gen2(float64 xa, float64 xb, float_status *s,
370              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
371              f64_check_fn pre, f64_check_fn post)
372 {
373     union_float64 ua, ub, ur;
374 
375     ua.s = xa;
376     ub.s = xb;
377 
378     if (unlikely(!can_use_fpu(s))) {
379         goto soft;
380     }
381 
382     float64_input_flush2(&ua.s, &ub.s, s);
383     if (unlikely(!pre(ua, ub))) {
384         goto soft;
385     }
386 
387     ur.h = hard(ua.h, ub.h);
388     if (unlikely(f64_is_inf(ur))) {
389         float_raise(float_flag_overflow, s);
390     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
391         goto soft;
392     }
393     return ur.s;
394 
395  soft:
396     return soft(ua.s, ub.s, s);
397 }
398 
399 /*
400  * Classify a floating point number. Everything above float_class_qnan
401  * is a NaN so cls >= float_class_qnan is any NaN.
402  *
403  * Note that we canonicalize denormals, so most code should treat
404  * class_normal and class_denormal identically.
405  */
406 
407 typedef enum __attribute__ ((__packed__)) {
408     float_class_unclassified,
409     float_class_zero,
410     float_class_normal,
411     float_class_denormal, /* input was a non-squashed denormal */
412     float_class_inf,
413     float_class_qnan,  /* all NaNs from here */
414     float_class_snan,
415 } FloatClass;
416 
417 #define float_cmask(bit)  (1u << (bit))
418 
419 enum {
420     float_cmask_zero    = float_cmask(float_class_zero),
421     float_cmask_normal  = float_cmask(float_class_normal),
422     float_cmask_denormal = float_cmask(float_class_denormal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429     float_cmask_anynorm = float_cmask_normal | float_cmask_denormal,
430 };
431 
432 /* Flags for parts_minmax. */
433 enum {
434     /* Set for minimum; clear for maximum. */
435     minmax_ismin = 1,
436     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
437     minmax_isnum = 2,
438     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
439     minmax_ismag = 4,
440     /*
441      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
442      * operations.
443      */
444     minmax_isnumber = 8,
445 };
446 
447 /* Simple helpers for checking if, or what kind of, NaN we have */
is_nan(FloatClass c)448 static inline __attribute__((unused)) bool is_nan(FloatClass c)
449 {
450     return unlikely(c >= float_class_qnan);
451 }
452 
is_snan(FloatClass c)453 static inline __attribute__((unused)) bool is_snan(FloatClass c)
454 {
455     return c == float_class_snan;
456 }
457 
is_qnan(FloatClass c)458 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
459 {
460     return c == float_class_qnan;
461 }
462 
463 /*
464  * Return true if the float_cmask has only normals in it
465  * (including input denormals that were canonicalized)
466  */
cmask_is_only_normals(int cmask)467 static inline bool cmask_is_only_normals(int cmask)
468 {
469     return !(cmask & ~float_cmask_anynorm);
470 }
471 
is_anynorm(FloatClass c)472 static inline bool is_anynorm(FloatClass c)
473 {
474     return float_cmask(c) & float_cmask_anynorm;
475 }
476 
477 /*
478  * Structure holding all of the decomposed parts of a float.
479  * The exponent is unbiased and the fraction is normalized.
480  *
481  * The fraction words are stored in big-endian word ordering,
482  * so that truncation from a larger format to a smaller format
483  * can be done simply by ignoring subsequent elements.
484  */
485 
486 typedef struct {
487     FloatClass cls;
488     bool sign;
489     int32_t exp;
490     union {
491         /* Routines that know the structure may reference the singular name. */
492         uint64_t frac;
493         /*
494          * Routines expanded with multiple structures reference "hi" and "lo"
495          * depending on the operation.  In FloatParts64, "hi" and "lo" are
496          * both the same word and aliased here.
497          */
498         uint64_t frac_hi;
499         uint64_t frac_lo;
500     };
501 } FloatParts64;
502 
503 typedef struct {
504     FloatClass cls;
505     bool sign;
506     int32_t exp;
507     uint64_t frac_hi;
508     uint64_t frac_lo;
509 } FloatParts128;
510 
511 typedef struct {
512     FloatClass cls;
513     bool sign;
514     int32_t exp;
515     uint64_t frac_hi;
516     uint64_t frac_hm;  /* high-middle */
517     uint64_t frac_lm;  /* low-middle */
518     uint64_t frac_lo;
519 } FloatParts256;
520 
521 /* These apply to the most significant word of each FloatPartsN. */
522 #define DECOMPOSED_BINARY_POINT    63
523 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
524 
525 /* Structure holding all of the relevant parameters for a format.
526  *   exp_size: the size of the exponent field
527  *   exp_bias: the offset applied to the exponent field
528  *   exp_max: the maximum normalised exponent
529  *   frac_size: the size of the fraction field
530  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
531  * The following are computed based the size of fraction
532  *   round_mask: bits below lsb which must be rounded
533  * The following optional modifiers are available:
534  *   arm_althp: handle ARM Alternative Half Precision
535  *   has_explicit_bit: has an explicit integer bit; this affects whether
536  *   the float_status floatx80_behaviour handling applies
537  */
538 typedef struct {
539     int exp_size;
540     int exp_bias;
541     int exp_re_bias;
542     int exp_max;
543     int frac_size;
544     int frac_shift;
545     bool arm_althp;
546     bool has_explicit_bit;
547     uint64_t round_mask;
548 } FloatFmt;
549 
550 /* Expand fields based on the size of exponent and fraction */
551 #define FLOAT_PARAMS_(E)                                \
552     .exp_size       = E,                                \
553     .exp_bias       = ((1 << E) - 1) >> 1,              \
554     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
555     .exp_max        = (1 << E) - 1
556 
557 #define FLOAT_PARAMS(E, F)                              \
558     FLOAT_PARAMS_(E),                                   \
559     .frac_size      = F,                                \
560     .frac_shift     = (-F - 1) & 63,                    \
561     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
562 
563 static const FloatFmt float16_params = {
564     FLOAT_PARAMS(5, 10)
565 };
566 
567 static const FloatFmt float16_params_ahp = {
568     FLOAT_PARAMS(5, 10),
569     .arm_althp = true
570 };
571 
572 static const FloatFmt bfloat16_params = {
573     FLOAT_PARAMS(8, 7)
574 };
575 
576 static const FloatFmt float32_params = {
577     FLOAT_PARAMS(8, 23)
578 };
579 
580 static const FloatFmt float64_params = {
581     FLOAT_PARAMS(11, 52)
582 };
583 
584 static const FloatFmt float128_params = {
585     FLOAT_PARAMS(15, 112)
586 };
587 
588 #define FLOATX80_PARAMS(R)              \
589     FLOAT_PARAMS_(15),                  \
590     .frac_size = R == 64 ? 63 : R,      \
591     .frac_shift = 0,                    \
592     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
593 
594 static const FloatFmt floatx80_params[3] = {
595     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
596     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
597     [floatx80_precision_x] = {
598         FLOATX80_PARAMS(64),
599         .has_explicit_bit = true,
600     },
601 };
602 
603 /* Unpack a float to parts, but do not canonicalize.  */
unpack_raw64(FloatParts64 * r,const FloatFmt * fmt,uint64_t raw)604 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
605 {
606     const int f_size = fmt->frac_size;
607     const int e_size = fmt->exp_size;
608 
609     *r = (FloatParts64) {
610         .cls = float_class_unclassified,
611         .sign = extract64(raw, f_size + e_size, 1),
612         .exp = extract64(raw, f_size, e_size),
613         .frac = extract64(raw, 0, f_size)
614     };
615 }
616 
float16_unpack_raw(FloatParts64 * p,float16 f)617 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
618 {
619     unpack_raw64(p, &float16_params, f);
620 }
621 
bfloat16_unpack_raw(FloatParts64 * p,bfloat16 f)622 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
623 {
624     unpack_raw64(p, &bfloat16_params, f);
625 }
626 
float32_unpack_raw(FloatParts64 * p,float32 f)627 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
628 {
629     unpack_raw64(p, &float32_params, f);
630 }
631 
float64_unpack_raw(FloatParts64 * p,float64 f)632 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
633 {
634     unpack_raw64(p, &float64_params, f);
635 }
636 
floatx80_unpack_raw(FloatParts128 * p,floatx80 f)637 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
638 {
639     *p = (FloatParts128) {
640         .cls = float_class_unclassified,
641         .sign = extract32(f.high, 15, 1),
642         .exp = extract32(f.high, 0, 15),
643         .frac_hi = f.low
644     };
645 }
646 
float128_unpack_raw(FloatParts128 * p,float128 f)647 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
648 {
649     const int f_size = float128_params.frac_size - 64;
650     const int e_size = float128_params.exp_size;
651 
652     *p = (FloatParts128) {
653         .cls = float_class_unclassified,
654         .sign = extract64(f.high, f_size + e_size, 1),
655         .exp = extract64(f.high, f_size, e_size),
656         .frac_hi = extract64(f.high, 0, f_size),
657         .frac_lo = f.low,
658     };
659 }
660 
661 /* Pack a float from parts, but do not canonicalize.  */
pack_raw64(const FloatParts64 * p,const FloatFmt * fmt)662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
663 {
664     const int f_size = fmt->frac_size;
665     const int e_size = fmt->exp_size;
666     uint64_t ret;
667 
668     ret = (uint64_t)p->sign << (f_size + e_size);
669     ret = deposit64(ret, f_size, e_size, p->exp);
670     ret = deposit64(ret, 0, f_size, p->frac);
671     return ret;
672 }
673 
float16_pack_raw(const FloatParts64 * p)674 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
675 {
676     return make_float16(pack_raw64(p, &float16_params));
677 }
678 
bfloat16_pack_raw(const FloatParts64 * p)679 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
680 {
681     return pack_raw64(p, &bfloat16_params);
682 }
683 
float32_pack_raw(const FloatParts64 * p)684 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
685 {
686     return make_float32(pack_raw64(p, &float32_params));
687 }
688 
float64_pack_raw(const FloatParts64 * p)689 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
690 {
691     return make_float64(pack_raw64(p, &float64_params));
692 }
693 
float128_pack_raw(const FloatParts128 * p)694 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
695 {
696     const int f_size = float128_params.frac_size - 64;
697     const int e_size = float128_params.exp_size;
698     uint64_t hi;
699 
700     hi = (uint64_t)p->sign << (f_size + e_size);
701     hi = deposit64(hi, f_size, e_size, p->exp);
702     hi = deposit64(hi, 0, f_size, p->frac_hi);
703     return make_float128(hi, p->frac_lo);
704 }
705 
706 /*----------------------------------------------------------------------------
707 | Functions and definitions to determine:  (1) whether tininess for underflow
708 | is detected before or after rounding by default, (2) what (if anything)
709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711 | are propagated from function inputs to output.  These details are target-
712 | specific.
713 *----------------------------------------------------------------------------*/
714 #include "softfloat-specialize.c.inc"
715 
716 #define PARTS_GENERIC_64_128(NAME, P) \
717     _Generic((P), FloatParts64 *: parts64_##NAME, \
718                   FloatParts128 *: parts128_##NAME)
719 
720 #define PARTS_GENERIC_64_128_256(NAME, P) \
721     _Generic((P), FloatParts64 *: parts64_##NAME, \
722                   FloatParts128 *: parts128_##NAME, \
723                   FloatParts256 *: parts256_##NAME)
724 
725 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
726 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
727 
728 static void parts64_return_nan(FloatParts64 *a, float_status *s);
729 static void parts128_return_nan(FloatParts128 *a, float_status *s);
730 
731 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
732 
733 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
734                                       float_status *s);
735 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
736                                         float_status *s);
737 
738 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
739 
740 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
741                                              FloatParts64 *c, float_status *s,
742                                              int ab_mask, int abc_mask);
743 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
744                                                FloatParts128 *b,
745                                                FloatParts128 *c,
746                                                float_status *s,
747                                                int ab_mask, int abc_mask);
748 
749 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
750     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
751 
752 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
753                                  const FloatFmt *fmt);
754 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
755                                   const FloatFmt *fmt);
756 
757 #define parts_canonicalize(A, S, F) \
758     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
759 
760 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
761                                    const FloatFmt *fmt);
762 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
763                                     const FloatFmt *fmt);
764 
765 #define parts_uncanon_normal(A, S, F) \
766     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
767 
768 static void parts64_uncanon(FloatParts64 *p, float_status *status,
769                             const FloatFmt *fmt);
770 static void parts128_uncanon(FloatParts128 *p, float_status *status,
771                              const FloatFmt *fmt);
772 
773 #define parts_uncanon(A, S, F) \
774     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
775 
776 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
777 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
778 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
779 
780 #define parts_add_normal(A, B) \
781     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
782 
783 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
784 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
785 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
786 
787 #define parts_sub_normal(A, B) \
788     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
789 
790 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
791                                     float_status *s, bool subtract);
792 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
793                                       float_status *s, bool subtract);
794 
795 #define parts_addsub(A, B, S, Z) \
796     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
797 
798 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
799                                  float_status *s);
800 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
801                                    float_status *s);
802 
803 #define parts_mul(A, B, S) \
804     PARTS_GENERIC_64_128(mul, A)(A, B, S)
805 
806 static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
807                                            FloatParts64 *c, int scale,
808                                            int flags, float_status *s);
809 static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
810                                              FloatParts128 *c, int scale,
811                                              int flags, float_status *s);
812 
813 #define parts_muladd_scalbn(A, B, C, Z, Y, S) \
814     PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
815 
816 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
817                                  float_status *s);
818 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
819                                    float_status *s);
820 
821 #define parts_div(A, B, S) \
822     PARTS_GENERIC_64_128(div, A)(A, B, S)
823 
824 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
825                                     uint64_t *mod_quot, float_status *s);
826 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
827                                       uint64_t *mod_quot, float_status *s);
828 
829 #define parts_modrem(A, B, Q, S) \
830     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
831 
832 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
833 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
834 
835 #define parts_sqrt(A, S, F) \
836     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
837 
838 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
839                                         int scale, int frac_size);
840 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
841                                          int scale, int frac_size);
842 
843 #define parts_round_to_int_normal(A, R, C, F) \
844     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
845 
846 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
847                                  int scale, float_status *s,
848                                  const FloatFmt *fmt);
849 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
850                                   int scale, float_status *s,
851                                   const FloatFmt *fmt);
852 
853 #define parts_round_to_int(A, R, C, S, F) \
854     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
855 
856 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
857                                      int scale, int64_t min, int64_t max,
858                                      float_status *s);
859 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
860                                      int scale, int64_t min, int64_t max,
861                                      float_status *s);
862 
863 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
864     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
865 
866 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
867                                       int scale, uint64_t max,
868                                       float_status *s);
869 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
870                                        int scale, uint64_t max,
871                                        float_status *s);
872 
873 #define parts_float_to_uint(P, R, Z, M, S) \
874     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
875 
876 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
877                                             FloatRoundMode rmode,
878                                             int bitsm1, float_status *s);
879 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
880                                              FloatRoundMode rmode,
881                                              int bitsm1, float_status *s);
882 
883 #define parts_float_to_sint_modulo(P, R, M, S) \
884     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
885 
886 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
887                                   int scale, float_status *s);
888 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
889                                    int scale, float_status *s);
890 
891 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
892     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
893 
894 #define parts_sint_to_float(P, I, Z, S) \
895     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
896 
897 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
898                                   int scale, float_status *s);
899 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
900                                    int scale, float_status *s);
901 
902 #define parts_uint_to_float(P, I, Z, S) \
903     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
904 
905 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
906                                     float_status *s, int flags);
907 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
908                                       float_status *s, int flags);
909 
910 #define parts_minmax(A, B, S, F) \
911     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
912 
913 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
914                                      float_status *s, bool q);
915 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
916                                       float_status *s, bool q);
917 
918 #define parts_compare(A, B, S, Q) \
919     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
920 
921 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
922 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
923 
924 #define parts_scalbn(A, N, S) \
925     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
926 
927 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
928 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
929 
930 #define parts_log2(A, S, F) \
931     PARTS_GENERIC_64_128(log2, A)(A, S, F)
932 
933 /*
934  * Helper functions for softfloat-parts.c.inc, per-size operations.
935  */
936 
937 #define FRAC_GENERIC_64_128(NAME, P) \
938     _Generic((P), FloatParts64 *: frac64_##NAME, \
939                   FloatParts128 *: frac128_##NAME)
940 
941 #define FRAC_GENERIC_64_128_256(NAME, P) \
942     _Generic((P), FloatParts64 *: frac64_##NAME, \
943                   FloatParts128 *: frac128_##NAME, \
944                   FloatParts256 *: frac256_##NAME)
945 
frac64_add(FloatParts64 * r,FloatParts64 * a,FloatParts64 * b)946 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
947 {
948     return uadd64_overflow(a->frac, b->frac, &r->frac);
949 }
950 
frac128_add(FloatParts128 * r,FloatParts128 * a,FloatParts128 * b)951 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
952 {
953     bool c = 0;
954     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
955     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
956     return c;
957 }
958 
frac256_add(FloatParts256 * r,FloatParts256 * a,FloatParts256 * b)959 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
960 {
961     bool c = 0;
962     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
963     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
964     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
965     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
966     return c;
967 }
968 
969 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
970 
frac64_addi(FloatParts64 * r,FloatParts64 * a,uint64_t c)971 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
972 {
973     return uadd64_overflow(a->frac, c, &r->frac);
974 }
975 
frac128_addi(FloatParts128 * r,FloatParts128 * a,uint64_t c)976 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
977 {
978     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
979     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
980 }
981 
982 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
983 
frac64_allones(FloatParts64 * a)984 static void frac64_allones(FloatParts64 *a)
985 {
986     a->frac = -1;
987 }
988 
frac128_allones(FloatParts128 * a)989 static void frac128_allones(FloatParts128 *a)
990 {
991     a->frac_hi = a->frac_lo = -1;
992 }
993 
994 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
995 
frac64_cmp(FloatParts64 * a,FloatParts64 * b)996 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
997 {
998     return (a->frac == b->frac ? float_relation_equal
999             : a->frac < b->frac ? float_relation_less
1000             : float_relation_greater);
1001 }
1002 
frac128_cmp(FloatParts128 * a,FloatParts128 * b)1003 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
1004 {
1005     uint64_t ta = a->frac_hi, tb = b->frac_hi;
1006     if (ta == tb) {
1007         ta = a->frac_lo, tb = b->frac_lo;
1008         if (ta == tb) {
1009             return float_relation_equal;
1010         }
1011     }
1012     return ta < tb ? float_relation_less : float_relation_greater;
1013 }
1014 
1015 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1016 
frac64_clear(FloatParts64 * a)1017 static void frac64_clear(FloatParts64 *a)
1018 {
1019     a->frac = 0;
1020 }
1021 
frac128_clear(FloatParts128 * a)1022 static void frac128_clear(FloatParts128 *a)
1023 {
1024     a->frac_hi = a->frac_lo = 0;
1025 }
1026 
1027 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1028 
frac64_div(FloatParts64 * a,FloatParts64 * b)1029 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1030 {
1031     uint64_t n1, n0, r, q;
1032     bool ret;
1033 
1034     /*
1035      * We want a 2*N / N-bit division to produce exactly an N-bit
1036      * result, so that we do not lose any precision and so that we
1037      * do not have to renormalize afterward.  If A.frac < B.frac,
1038      * then division would produce an (N-1)-bit result; shift A left
1039      * by one to produce the an N-bit result, and return true to
1040      * decrement the exponent to match.
1041      *
1042      * The udiv_qrnnd algorithm that we're using requires normalization,
1043      * i.e. the msb of the denominator must be set, which is already true.
1044      */
1045     ret = a->frac < b->frac;
1046     if (ret) {
1047         n0 = a->frac;
1048         n1 = 0;
1049     } else {
1050         n0 = a->frac >> 1;
1051         n1 = a->frac << 63;
1052     }
1053     q = udiv_qrnnd(&r, n0, n1, b->frac);
1054 
1055     /* Set lsb if there is a remainder, to set inexact. */
1056     a->frac = q | (r != 0);
1057 
1058     return ret;
1059 }
1060 
frac128_div(FloatParts128 * a,FloatParts128 * b)1061 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1062 {
1063     uint64_t q0, q1, a0, a1, b0, b1;
1064     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1065     bool ret = false;
1066 
1067     a0 = a->frac_hi, a1 = a->frac_lo;
1068     b0 = b->frac_hi, b1 = b->frac_lo;
1069 
1070     ret = lt128(a0, a1, b0, b1);
1071     if (!ret) {
1072         a1 = shr_double(a0, a1, 1);
1073         a0 = a0 >> 1;
1074     }
1075 
1076     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1077     q0 = estimateDiv128To64(a0, a1, b0);
1078 
1079     /*
1080      * Estimate is high because B1 was not included (unless B1 == 0).
1081      * Reduce quotient and increase remainder until remainder is non-negative.
1082      * This loop will execute 0 to 2 times.
1083      */
1084     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1085     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1086     while (r0 != 0) {
1087         q0--;
1088         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1089     }
1090 
1091     /* Repeat using the remainder, producing a second word of quotient. */
1092     q1 = estimateDiv128To64(r1, r2, b0);
1093     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1094     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1095     while (r1 != 0) {
1096         q1--;
1097         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1098     }
1099 
1100     /* Any remainder indicates inexact; set sticky bit. */
1101     q1 |= (r2 | r3) != 0;
1102 
1103     a->frac_hi = q0;
1104     a->frac_lo = q1;
1105     return ret;
1106 }
1107 
1108 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1109 
frac64_eqz(FloatParts64 * a)1110 static bool frac64_eqz(FloatParts64 *a)
1111 {
1112     return a->frac == 0;
1113 }
1114 
frac128_eqz(FloatParts128 * a)1115 static bool frac128_eqz(FloatParts128 *a)
1116 {
1117     return (a->frac_hi | a->frac_lo) == 0;
1118 }
1119 
1120 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1121 
frac64_mulw(FloatParts128 * r,FloatParts64 * a,FloatParts64 * b)1122 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1123 {
1124     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1125 }
1126 
frac128_mulw(FloatParts256 * r,FloatParts128 * a,FloatParts128 * b)1127 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1128 {
1129     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1130                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1131 }
1132 
1133 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1134 
frac64_neg(FloatParts64 * a)1135 static void frac64_neg(FloatParts64 *a)
1136 {
1137     a->frac = -a->frac;
1138 }
1139 
frac128_neg(FloatParts128 * a)1140 static void frac128_neg(FloatParts128 *a)
1141 {
1142     bool c = 0;
1143     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1145 }
1146 
frac256_neg(FloatParts256 * a)1147 static void frac256_neg(FloatParts256 *a)
1148 {
1149     bool c = 0;
1150     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1151     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1152     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1153     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1154 }
1155 
1156 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1157 
frac64_normalize(FloatParts64 * a)1158 static int frac64_normalize(FloatParts64 *a)
1159 {
1160     if (a->frac) {
1161         int shift = clz64(a->frac);
1162         a->frac <<= shift;
1163         return shift;
1164     }
1165     return 64;
1166 }
1167 
frac128_normalize(FloatParts128 * a)1168 static int frac128_normalize(FloatParts128 *a)
1169 {
1170     if (a->frac_hi) {
1171         int shl = clz64(a->frac_hi);
1172         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1173         a->frac_lo <<= shl;
1174         return shl;
1175     } else if (a->frac_lo) {
1176         int shl = clz64(a->frac_lo);
1177         a->frac_hi = a->frac_lo << shl;
1178         a->frac_lo = 0;
1179         return shl + 64;
1180     }
1181     return 128;
1182 }
1183 
frac256_normalize(FloatParts256 * a)1184 static int frac256_normalize(FloatParts256 *a)
1185 {
1186     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1187     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1188     int ret, shl;
1189 
1190     if (likely(a0)) {
1191         shl = clz64(a0);
1192         if (shl == 0) {
1193             return 0;
1194         }
1195         ret = shl;
1196     } else {
1197         if (a1) {
1198             ret = 64;
1199             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1200         } else if (a2) {
1201             ret = 128;
1202             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1203         } else if (a3) {
1204             ret = 192;
1205             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1206         } else {
1207             ret = 256;
1208             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1209             goto done;
1210         }
1211         shl = clz64(a0);
1212         if (shl == 0) {
1213             goto done;
1214         }
1215         ret += shl;
1216     }
1217 
1218     a0 = shl_double(a0, a1, shl);
1219     a1 = shl_double(a1, a2, shl);
1220     a2 = shl_double(a2, a3, shl);
1221     a3 <<= shl;
1222 
1223  done:
1224     a->frac_hi = a0;
1225     a->frac_hm = a1;
1226     a->frac_lm = a2;
1227     a->frac_lo = a3;
1228     return ret;
1229 }
1230 
1231 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1232 
frac64_modrem(FloatParts64 * a,FloatParts64 * b,uint64_t * mod_quot)1233 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1234 {
1235     uint64_t a0, a1, b0, t0, t1, q, quot;
1236     int exp_diff = a->exp - b->exp;
1237     int shift;
1238 
1239     a0 = a->frac;
1240     a1 = 0;
1241 
1242     if (exp_diff < -1) {
1243         if (mod_quot) {
1244             *mod_quot = 0;
1245         }
1246         return;
1247     }
1248     if (exp_diff == -1) {
1249         a0 >>= 1;
1250         exp_diff = 0;
1251     }
1252 
1253     b0 = b->frac;
1254     quot = q = b0 <= a0;
1255     if (q) {
1256         a0 -= b0;
1257     }
1258 
1259     exp_diff -= 64;
1260     while (exp_diff > 0) {
1261         q = estimateDiv128To64(a0, a1, b0);
1262         q = q > 2 ? q - 2 : 0;
1263         mul64To128(b0, q, &t0, &t1);
1264         sub128(a0, a1, t0, t1, &a0, &a1);
1265         shortShift128Left(a0, a1, 62, &a0, &a1);
1266         exp_diff -= 62;
1267         quot = (quot << 62) + q;
1268     }
1269 
1270     exp_diff += 64;
1271     if (exp_diff > 0) {
1272         q = estimateDiv128To64(a0, a1, b0);
1273         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1274         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1275         sub128(a0, a1, t0, t1, &a0, &a1);
1276         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1277         while (le128(t0, t1, a0, a1)) {
1278             ++q;
1279             sub128(a0, a1, t0, t1, &a0, &a1);
1280         }
1281         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1282     } else {
1283         t0 = b0;
1284         t1 = 0;
1285     }
1286 
1287     if (mod_quot) {
1288         *mod_quot = quot;
1289     } else {
1290         sub128(t0, t1, a0, a1, &t0, &t1);
1291         if (lt128(t0, t1, a0, a1) ||
1292             (eq128(t0, t1, a0, a1) && (q & 1))) {
1293             a0 = t0;
1294             a1 = t1;
1295             a->sign = !a->sign;
1296         }
1297     }
1298 
1299     if (likely(a0)) {
1300         shift = clz64(a0);
1301         shortShift128Left(a0, a1, shift, &a0, &a1);
1302     } else if (likely(a1)) {
1303         shift = clz64(a1);
1304         a0 = a1 << shift;
1305         a1 = 0;
1306         shift += 64;
1307     } else {
1308         a->cls = float_class_zero;
1309         return;
1310     }
1311 
1312     a->exp = b->exp + exp_diff - shift;
1313     a->frac = a0 | (a1 != 0);
1314 }
1315 
frac128_modrem(FloatParts128 * a,FloatParts128 * b,uint64_t * mod_quot)1316 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1317                            uint64_t *mod_quot)
1318 {
1319     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1320     int exp_diff = a->exp - b->exp;
1321     int shift;
1322 
1323     a0 = a->frac_hi;
1324     a1 = a->frac_lo;
1325     a2 = 0;
1326 
1327     if (exp_diff < -1) {
1328         if (mod_quot) {
1329             *mod_quot = 0;
1330         }
1331         return;
1332     }
1333     if (exp_diff == -1) {
1334         shift128Right(a0, a1, 1, &a0, &a1);
1335         exp_diff = 0;
1336     }
1337 
1338     b0 = b->frac_hi;
1339     b1 = b->frac_lo;
1340 
1341     quot = q = le128(b0, b1, a0, a1);
1342     if (q) {
1343         sub128(a0, a1, b0, b1, &a0, &a1);
1344     }
1345 
1346     exp_diff -= 64;
1347     while (exp_diff > 0) {
1348         q = estimateDiv128To64(a0, a1, b0);
1349         q = q > 4 ? q - 4 : 0;
1350         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1351         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1352         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1353         exp_diff -= 61;
1354         quot = (quot << 61) + q;
1355     }
1356 
1357     exp_diff += 64;
1358     if (exp_diff > 0) {
1359         q = estimateDiv128To64(a0, a1, b0);
1360         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1361         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1362         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1363         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1364         while (le192(t0, t1, t2, a0, a1, a2)) {
1365             ++q;
1366             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1367         }
1368         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1369     } else {
1370         t0 = b0;
1371         t1 = b1;
1372         t2 = 0;
1373     }
1374 
1375     if (mod_quot) {
1376         *mod_quot = quot;
1377     } else {
1378         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1379         if (lt192(t0, t1, t2, a0, a1, a2) ||
1380             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1381             a0 = t0;
1382             a1 = t1;
1383             a2 = t2;
1384             a->sign = !a->sign;
1385         }
1386     }
1387 
1388     if (likely(a0)) {
1389         shift = clz64(a0);
1390         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1391     } else if (likely(a1)) {
1392         shift = clz64(a1);
1393         shortShift128Left(a1, a2, shift, &a0, &a1);
1394         a2 = 0;
1395         shift += 64;
1396     } else if (likely(a2)) {
1397         shift = clz64(a2);
1398         a0 = a2 << shift;
1399         a1 = a2 = 0;
1400         shift += 128;
1401     } else {
1402         a->cls = float_class_zero;
1403         return;
1404     }
1405 
1406     a->exp = b->exp + exp_diff - shift;
1407     a->frac_hi = a0;
1408     a->frac_lo = a1 | (a2 != 0);
1409 }
1410 
1411 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1412 
frac64_shl(FloatParts64 * a,int c)1413 static void frac64_shl(FloatParts64 *a, int c)
1414 {
1415     a->frac <<= c;
1416 }
1417 
frac128_shl(FloatParts128 * a,int c)1418 static void frac128_shl(FloatParts128 *a, int c)
1419 {
1420     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1421 
1422     if (c & 64) {
1423         a0 = a1, a1 = 0;
1424     }
1425 
1426     c &= 63;
1427     if (c) {
1428         a0 = shl_double(a0, a1, c);
1429         a1 = a1 << c;
1430     }
1431 
1432     a->frac_hi = a0;
1433     a->frac_lo = a1;
1434 }
1435 
1436 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1437 
frac64_shr(FloatParts64 * a,int c)1438 static void frac64_shr(FloatParts64 *a, int c)
1439 {
1440     a->frac >>= c;
1441 }
1442 
frac128_shr(FloatParts128 * a,int c)1443 static void frac128_shr(FloatParts128 *a, int c)
1444 {
1445     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1446 
1447     if (c & 64) {
1448         a1 = a0, a0 = 0;
1449     }
1450 
1451     c &= 63;
1452     if (c) {
1453         a1 = shr_double(a0, a1, c);
1454         a0 = a0 >> c;
1455     }
1456 
1457     a->frac_hi = a0;
1458     a->frac_lo = a1;
1459 }
1460 
1461 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1462 
frac64_shrjam(FloatParts64 * a,int c)1463 static void frac64_shrjam(FloatParts64 *a, int c)
1464 {
1465     uint64_t a0 = a->frac;
1466 
1467     if (likely(c != 0)) {
1468         if (likely(c < 64)) {
1469             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1470         } else {
1471             a0 = a0 != 0;
1472         }
1473         a->frac = a0;
1474     }
1475 }
1476 
frac128_shrjam(FloatParts128 * a,int c)1477 static void frac128_shrjam(FloatParts128 *a, int c)
1478 {
1479     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1480     uint64_t sticky = 0;
1481 
1482     if (unlikely(c == 0)) {
1483         return;
1484     } else if (likely(c < 64)) {
1485         /* nothing */
1486     } else if (likely(c < 128)) {
1487         sticky = a1;
1488         a1 = a0;
1489         a0 = 0;
1490         c &= 63;
1491         if (c == 0) {
1492             goto done;
1493         }
1494     } else {
1495         sticky = a0 | a1;
1496         a0 = a1 = 0;
1497         goto done;
1498     }
1499 
1500     sticky |= shr_double(a1, 0, c);
1501     a1 = shr_double(a0, a1, c);
1502     a0 = a0 >> c;
1503 
1504  done:
1505     a->frac_lo = a1 | (sticky != 0);
1506     a->frac_hi = a0;
1507 }
1508 
frac256_shrjam(FloatParts256 * a,int c)1509 static void frac256_shrjam(FloatParts256 *a, int c)
1510 {
1511     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1512     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1513     uint64_t sticky = 0;
1514 
1515     if (unlikely(c == 0)) {
1516         return;
1517     } else if (likely(c < 64)) {
1518         /* nothing */
1519     } else if (likely(c < 256)) {
1520         if (unlikely(c & 128)) {
1521             sticky |= a2 | a3;
1522             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1523         }
1524         if (unlikely(c & 64)) {
1525             sticky |= a3;
1526             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1527         }
1528         c &= 63;
1529         if (c == 0) {
1530             goto done;
1531         }
1532     } else {
1533         sticky = a0 | a1 | a2 | a3;
1534         a0 = a1 = a2 = a3 = 0;
1535         goto done;
1536     }
1537 
1538     sticky |= shr_double(a3, 0, c);
1539     a3 = shr_double(a2, a3, c);
1540     a2 = shr_double(a1, a2, c);
1541     a1 = shr_double(a0, a1, c);
1542     a0 = a0 >> c;
1543 
1544  done:
1545     a->frac_lo = a3 | (sticky != 0);
1546     a->frac_lm = a2;
1547     a->frac_hm = a1;
1548     a->frac_hi = a0;
1549 }
1550 
1551 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1552 
frac64_sub(FloatParts64 * r,FloatParts64 * a,FloatParts64 * b)1553 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1554 {
1555     return usub64_overflow(a->frac, b->frac, &r->frac);
1556 }
1557 
frac128_sub(FloatParts128 * r,FloatParts128 * a,FloatParts128 * b)1558 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1559 {
1560     bool c = 0;
1561     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1562     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1563     return c;
1564 }
1565 
frac256_sub(FloatParts256 * r,FloatParts256 * a,FloatParts256 * b)1566 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1567 {
1568     bool c = 0;
1569     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1570     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1571     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1572     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1573     return c;
1574 }
1575 
1576 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1577 
frac64_truncjam(FloatParts64 * r,FloatParts128 * a)1578 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1579 {
1580     r->frac = a->frac_hi | (a->frac_lo != 0);
1581 }
1582 
frac128_truncjam(FloatParts128 * r,FloatParts256 * a)1583 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1584 {
1585     r->frac_hi = a->frac_hi;
1586     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1587 }
1588 
1589 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1590 
frac64_widen(FloatParts128 * r,FloatParts64 * a)1591 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1592 {
1593     r->frac_hi = a->frac;
1594     r->frac_lo = 0;
1595 }
1596 
frac128_widen(FloatParts256 * r,FloatParts128 * a)1597 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1598 {
1599     r->frac_hi = a->frac_hi;
1600     r->frac_hm = a->frac_lo;
1601     r->frac_lm = 0;
1602     r->frac_lo = 0;
1603 }
1604 
1605 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1606 
1607 /*
1608  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1609  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1610  * and thus MIT licenced.
1611  */
1612 static const uint16_t rsqrt_tab[128] = {
1613     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1614     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1615     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1616     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1617     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1618     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1619     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1620     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1621     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1622     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1623     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1624     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1625     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1626     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1627     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1628     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1629 };
1630 
1631 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1632 #define FloatPartsN    glue(FloatParts,N)
1633 #define FloatPartsW    glue(FloatParts,W)
1634 
1635 #define N 64
1636 #define W 128
1637 
1638 #include "softfloat-parts-addsub.c.inc"
1639 #include "softfloat-parts.c.inc"
1640 
1641 #undef  N
1642 #undef  W
1643 #define N 128
1644 #define W 256
1645 
1646 #include "softfloat-parts-addsub.c.inc"
1647 #include "softfloat-parts.c.inc"
1648 
1649 #undef  N
1650 #undef  W
1651 #define N            256
1652 
1653 #include "softfloat-parts-addsub.c.inc"
1654 
1655 #undef  N
1656 #undef  W
1657 #undef  partsN
1658 #undef  FloatPartsN
1659 #undef  FloatPartsW
1660 
1661 /*
1662  * Pack/unpack routines with a specific FloatFmt.
1663  */
1664 
float16a_unpack_canonical(FloatParts64 * p,float16 f,float_status * s,const FloatFmt * params)1665 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1666                                       float_status *s, const FloatFmt *params)
1667 {
1668     float16_unpack_raw(p, f);
1669     parts_canonicalize(p, s, params);
1670 }
1671 
float16_unpack_canonical(FloatParts64 * p,float16 f,float_status * s)1672 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1673                                      float_status *s)
1674 {
1675     float16a_unpack_canonical(p, f, s, &float16_params);
1676 }
1677 
bfloat16_unpack_canonical(FloatParts64 * p,bfloat16 f,float_status * s)1678 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1679                                       float_status *s)
1680 {
1681     bfloat16_unpack_raw(p, f);
1682     parts_canonicalize(p, s, &bfloat16_params);
1683 }
1684 
float16a_round_pack_canonical(FloatParts64 * p,float_status * s,const FloatFmt * params)1685 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1686                                              float_status *s,
1687                                              const FloatFmt *params)
1688 {
1689     parts_uncanon(p, s, params);
1690     return float16_pack_raw(p);
1691 }
1692 
float16_round_pack_canonical(FloatParts64 * p,float_status * s)1693 static float16 float16_round_pack_canonical(FloatParts64 *p,
1694                                             float_status *s)
1695 {
1696     return float16a_round_pack_canonical(p, s, &float16_params);
1697 }
1698 
bfloat16_round_pack_canonical(FloatParts64 * p,float_status * s)1699 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1700                                               float_status *s)
1701 {
1702     parts_uncanon(p, s, &bfloat16_params);
1703     return bfloat16_pack_raw(p);
1704 }
1705 
float32_unpack_canonical(FloatParts64 * p,float32 f,float_status * s)1706 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1707                                      float_status *s)
1708 {
1709     float32_unpack_raw(p, f);
1710     parts_canonicalize(p, s, &float32_params);
1711 }
1712 
float32_round_pack_canonical(FloatParts64 * p,float_status * s)1713 static float32 float32_round_pack_canonical(FloatParts64 *p,
1714                                             float_status *s)
1715 {
1716     parts_uncanon(p, s, &float32_params);
1717     return float32_pack_raw(p);
1718 }
1719 
float64_unpack_canonical(FloatParts64 * p,float64 f,float_status * s)1720 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1721                                      float_status *s)
1722 {
1723     float64_unpack_raw(p, f);
1724     parts_canonicalize(p, s, &float64_params);
1725 }
1726 
float64_round_pack_canonical(FloatParts64 * p,float_status * s)1727 static float64 float64_round_pack_canonical(FloatParts64 *p,
1728                                             float_status *s)
1729 {
1730     parts_uncanon(p, s, &float64_params);
1731     return float64_pack_raw(p);
1732 }
1733 
float64r32_round_pack_canonical(FloatParts64 * p,float_status * s)1734 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1735                                                float_status *s)
1736 {
1737     parts_uncanon(p, s, &float32_params);
1738 
1739     /*
1740      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1741      * We need to adjust the fraction higher so that the least N bits are
1742      * zero, and the fraction is adjacent to the float64 implicit bit.
1743      */
1744     switch (p->cls) {
1745     case float_class_normal:
1746     case float_class_denormal:
1747         if (unlikely(p->exp == 0)) {
1748             /*
1749              * The result is denormal for float32, but can be represented
1750              * in normalized form for float64.  Adjust, per canonicalize.
1751              */
1752             int shift = frac_normalize(p);
1753             p->exp = (float32_params.frac_shift -
1754                       float32_params.exp_bias - shift + 1 +
1755                       float64_params.exp_bias);
1756             frac_shr(p, float64_params.frac_shift);
1757         } else {
1758             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1759             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1760         }
1761         break;
1762     case float_class_snan:
1763     case float_class_qnan:
1764         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1765         p->exp = float64_params.exp_max;
1766         break;
1767     case float_class_inf:
1768         p->exp = float64_params.exp_max;
1769         break;
1770     case float_class_zero:
1771         break;
1772     default:
1773         g_assert_not_reached();
1774     }
1775 
1776     return float64_pack_raw(p);
1777 }
1778 
float128_unpack_canonical(FloatParts128 * p,float128 f,float_status * s)1779 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1780                                       float_status *s)
1781 {
1782     float128_unpack_raw(p, f);
1783     parts_canonicalize(p, s, &float128_params);
1784 }
1785 
float128_round_pack_canonical(FloatParts128 * p,float_status * s)1786 static float128 float128_round_pack_canonical(FloatParts128 *p,
1787                                               float_status *s)
1788 {
1789     parts_uncanon(p, s, &float128_params);
1790     return float128_pack_raw(p);
1791 }
1792 
1793 /* Returns false if the encoding is invalid. */
floatx80_unpack_canonical(FloatParts128 * p,floatx80 f,float_status * s)1794 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1795                                       float_status *s)
1796 {
1797     /* Ensure rounding precision is set before beginning. */
1798     switch (s->floatx80_rounding_precision) {
1799     case floatx80_precision_x:
1800     case floatx80_precision_d:
1801     case floatx80_precision_s:
1802         break;
1803     default:
1804         g_assert_not_reached();
1805     }
1806 
1807     if (unlikely(floatx80_invalid_encoding(f, s))) {
1808         float_raise(float_flag_invalid, s);
1809         return false;
1810     }
1811 
1812     floatx80_unpack_raw(p, f);
1813 
1814     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1815         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1816     } else {
1817         /* The explicit integer bit is ignored, after invalid checks. */
1818         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1819         p->cls = (p->frac_hi == 0 ? float_class_inf
1820                   : parts_is_snan_frac(p->frac_hi, s)
1821                   ? float_class_snan : float_class_qnan);
1822     }
1823     return true;
1824 }
1825 
floatx80_round_pack_canonical(FloatParts128 * p,float_status * s)1826 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1827                                               float_status *s)
1828 {
1829     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1830     uint64_t frac;
1831     int exp;
1832 
1833     switch (p->cls) {
1834     case float_class_normal:
1835     case float_class_denormal:
1836         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1837             parts_uncanon_normal(p, s, fmt);
1838             frac = p->frac_hi;
1839             exp = p->exp;
1840         } else {
1841             FloatParts64 p64;
1842 
1843             p64.sign = p->sign;
1844             p64.exp = p->exp;
1845             frac_truncjam(&p64, p);
1846             parts_uncanon_normal(&p64, s, fmt);
1847             frac = p64.frac;
1848             exp = p64.exp;
1849         }
1850         if (exp != fmt->exp_max) {
1851             break;
1852         }
1853         /* rounded to inf -- fall through to set frac correctly */
1854 
1855     case float_class_inf:
1856         /* x86 and m68k differ in the setting of the integer bit. */
1857         frac = s->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero ?
1858             0 : (1ULL << 63);
1859         exp = fmt->exp_max;
1860         break;
1861 
1862     case float_class_zero:
1863         frac = 0;
1864         exp = 0;
1865         break;
1866 
1867     case float_class_snan:
1868     case float_class_qnan:
1869         /* NaNs have the integer bit set. */
1870         frac = p->frac_hi | (1ull << 63);
1871         exp = fmt->exp_max;
1872         break;
1873 
1874     default:
1875         g_assert_not_reached();
1876     }
1877 
1878     return packFloatx80(p->sign, exp, frac);
1879 }
1880 
1881 /*
1882  * Addition and subtraction
1883  */
1884 
1885 static float16 QEMU_FLATTEN
float16_addsub(float16 a,float16 b,float_status * status,bool subtract)1886 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1887 {
1888     FloatParts64 pa, pb, *pr;
1889 
1890     float16_unpack_canonical(&pa, a, status);
1891     float16_unpack_canonical(&pb, b, status);
1892     pr = parts_addsub(&pa, &pb, status, subtract);
1893 
1894     return float16_round_pack_canonical(pr, status);
1895 }
1896 
float16_add(float16 a,float16 b,float_status * status)1897 float16 float16_add(float16 a, float16 b, float_status *status)
1898 {
1899     return float16_addsub(a, b, status, false);
1900 }
1901 
float16_sub(float16 a,float16 b,float_status * status)1902 float16 float16_sub(float16 a, float16 b, float_status *status)
1903 {
1904     return float16_addsub(a, b, status, true);
1905 }
1906 
1907 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_addsub(float32 a,float32 b,float_status * status,bool subtract)1908 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1909 {
1910     FloatParts64 pa, pb, *pr;
1911 
1912     float32_unpack_canonical(&pa, a, status);
1913     float32_unpack_canonical(&pb, b, status);
1914     pr = parts_addsub(&pa, &pb, status, subtract);
1915 
1916     return float32_round_pack_canonical(pr, status);
1917 }
1918 
soft_f32_add(float32 a,float32 b,float_status * status)1919 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1920 {
1921     return soft_f32_addsub(a, b, status, false);
1922 }
1923 
soft_f32_sub(float32 a,float32 b,float_status * status)1924 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1925 {
1926     return soft_f32_addsub(a, b, status, true);
1927 }
1928 
1929 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_addsub(float64 a,float64 b,float_status * status,bool subtract)1930 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1931 {
1932     FloatParts64 pa, pb, *pr;
1933 
1934     float64_unpack_canonical(&pa, a, status);
1935     float64_unpack_canonical(&pb, b, status);
1936     pr = parts_addsub(&pa, &pb, status, subtract);
1937 
1938     return float64_round_pack_canonical(pr, status);
1939 }
1940 
soft_f64_add(float64 a,float64 b,float_status * status)1941 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1942 {
1943     return soft_f64_addsub(a, b, status, false);
1944 }
1945 
soft_f64_sub(float64 a,float64 b,float_status * status)1946 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1947 {
1948     return soft_f64_addsub(a, b, status, true);
1949 }
1950 
hard_f32_add(float a,float b)1951 static float hard_f32_add(float a, float b)
1952 {
1953     return a + b;
1954 }
1955 
hard_f32_sub(float a,float b)1956 static float hard_f32_sub(float a, float b)
1957 {
1958     return a - b;
1959 }
1960 
hard_f64_add(double a,double b)1961 static double hard_f64_add(double a, double b)
1962 {
1963     return a + b;
1964 }
1965 
hard_f64_sub(double a,double b)1966 static double hard_f64_sub(double a, double b)
1967 {
1968     return a - b;
1969 }
1970 
f32_addsubmul_post(union_float32 a,union_float32 b)1971 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1972 {
1973     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1974         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1975     }
1976     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1977 }
1978 
f64_addsubmul_post(union_float64 a,union_float64 b)1979 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1980 {
1981     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1982         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1983     } else {
1984         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1985     }
1986 }
1987 
float32_addsub(float32 a,float32 b,float_status * s,hard_f32_op2_fn hard,soft_f32_op2_fn soft)1988 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1989                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1990 {
1991     return float32_gen2(a, b, s, hard, soft,
1992                         f32_is_zon2, f32_addsubmul_post);
1993 }
1994 
float64_addsub(float64 a,float64 b,float_status * s,hard_f64_op2_fn hard,soft_f64_op2_fn soft)1995 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1996                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1997 {
1998     return float64_gen2(a, b, s, hard, soft,
1999                         f64_is_zon2, f64_addsubmul_post);
2000 }
2001 
2002 float32 QEMU_FLATTEN
float32_add(float32 a,float32 b,float_status * s)2003 float32_add(float32 a, float32 b, float_status *s)
2004 {
2005     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
2006 }
2007 
2008 float32 QEMU_FLATTEN
float32_sub(float32 a,float32 b,float_status * s)2009 float32_sub(float32 a, float32 b, float_status *s)
2010 {
2011     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
2012 }
2013 
2014 float64 QEMU_FLATTEN
float64_add(float64 a,float64 b,float_status * s)2015 float64_add(float64 a, float64 b, float_status *s)
2016 {
2017     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2018 }
2019 
2020 float64 QEMU_FLATTEN
float64_sub(float64 a,float64 b,float_status * s)2021 float64_sub(float64 a, float64 b, float_status *s)
2022 {
2023     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2024 }
2025 
float64r32_addsub(float64 a,float64 b,float_status * status,bool subtract)2026 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2027                                  bool subtract)
2028 {
2029     FloatParts64 pa, pb, *pr;
2030 
2031     float64_unpack_canonical(&pa, a, status);
2032     float64_unpack_canonical(&pb, b, status);
2033     pr = parts_addsub(&pa, &pb, status, subtract);
2034 
2035     return float64r32_round_pack_canonical(pr, status);
2036 }
2037 
float64r32_add(float64 a,float64 b,float_status * status)2038 float64 float64r32_add(float64 a, float64 b, float_status *status)
2039 {
2040     return float64r32_addsub(a, b, status, false);
2041 }
2042 
float64r32_sub(float64 a,float64 b,float_status * status)2043 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2044 {
2045     return float64r32_addsub(a, b, status, true);
2046 }
2047 
2048 static bfloat16 QEMU_FLATTEN
bfloat16_addsub(bfloat16 a,bfloat16 b,float_status * status,bool subtract)2049 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2050 {
2051     FloatParts64 pa, pb, *pr;
2052 
2053     bfloat16_unpack_canonical(&pa, a, status);
2054     bfloat16_unpack_canonical(&pb, b, status);
2055     pr = parts_addsub(&pa, &pb, status, subtract);
2056 
2057     return bfloat16_round_pack_canonical(pr, status);
2058 }
2059 
bfloat16_add(bfloat16 a,bfloat16 b,float_status * status)2060 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2061 {
2062     return bfloat16_addsub(a, b, status, false);
2063 }
2064 
bfloat16_sub(bfloat16 a,bfloat16 b,float_status * status)2065 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2066 {
2067     return bfloat16_addsub(a, b, status, true);
2068 }
2069 
2070 static float128 QEMU_FLATTEN
float128_addsub(float128 a,float128 b,float_status * status,bool subtract)2071 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2072 {
2073     FloatParts128 pa, pb, *pr;
2074 
2075     float128_unpack_canonical(&pa, a, status);
2076     float128_unpack_canonical(&pb, b, status);
2077     pr = parts_addsub(&pa, &pb, status, subtract);
2078 
2079     return float128_round_pack_canonical(pr, status);
2080 }
2081 
float128_add(float128 a,float128 b,float_status * status)2082 float128 float128_add(float128 a, float128 b, float_status *status)
2083 {
2084     return float128_addsub(a, b, status, false);
2085 }
2086 
float128_sub(float128 a,float128 b,float_status * status)2087 float128 float128_sub(float128 a, float128 b, float_status *status)
2088 {
2089     return float128_addsub(a, b, status, true);
2090 }
2091 
2092 static floatx80 QEMU_FLATTEN
floatx80_addsub(floatx80 a,floatx80 b,float_status * status,bool subtract)2093 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2094 {
2095     FloatParts128 pa, pb, *pr;
2096 
2097     if (!floatx80_unpack_canonical(&pa, a, status) ||
2098         !floatx80_unpack_canonical(&pb, b, status)) {
2099         return floatx80_default_nan(status);
2100     }
2101 
2102     pr = parts_addsub(&pa, &pb, status, subtract);
2103     return floatx80_round_pack_canonical(pr, status);
2104 }
2105 
floatx80_add(floatx80 a,floatx80 b,float_status * status)2106 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2107 {
2108     return floatx80_addsub(a, b, status, false);
2109 }
2110 
floatx80_sub(floatx80 a,floatx80 b,float_status * status)2111 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2112 {
2113     return floatx80_addsub(a, b, status, true);
2114 }
2115 
2116 /*
2117  * Multiplication
2118  */
2119 
float16_mul(float16 a,float16 b,float_status * status)2120 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2121 {
2122     FloatParts64 pa, pb, *pr;
2123 
2124     float16_unpack_canonical(&pa, a, status);
2125     float16_unpack_canonical(&pb, b, status);
2126     pr = parts_mul(&pa, &pb, status);
2127 
2128     return float16_round_pack_canonical(pr, status);
2129 }
2130 
2131 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_mul(float32 a,float32 b,float_status * status)2132 soft_f32_mul(float32 a, float32 b, float_status *status)
2133 {
2134     FloatParts64 pa, pb, *pr;
2135 
2136     float32_unpack_canonical(&pa, a, status);
2137     float32_unpack_canonical(&pb, b, status);
2138     pr = parts_mul(&pa, &pb, status);
2139 
2140     return float32_round_pack_canonical(pr, status);
2141 }
2142 
2143 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_mul(float64 a,float64 b,float_status * status)2144 soft_f64_mul(float64 a, float64 b, float_status *status)
2145 {
2146     FloatParts64 pa, pb, *pr;
2147 
2148     float64_unpack_canonical(&pa, a, status);
2149     float64_unpack_canonical(&pb, b, status);
2150     pr = parts_mul(&pa, &pb, status);
2151 
2152     return float64_round_pack_canonical(pr, status);
2153 }
2154 
hard_f32_mul(float a,float b)2155 static float hard_f32_mul(float a, float b)
2156 {
2157     return a * b;
2158 }
2159 
hard_f64_mul(double a,double b)2160 static double hard_f64_mul(double a, double b)
2161 {
2162     return a * b;
2163 }
2164 
2165 float32 QEMU_FLATTEN
float32_mul(float32 a,float32 b,float_status * s)2166 float32_mul(float32 a, float32 b, float_status *s)
2167 {
2168     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2169                         f32_is_zon2, f32_addsubmul_post);
2170 }
2171 
2172 float64 QEMU_FLATTEN
float64_mul(float64 a,float64 b,float_status * s)2173 float64_mul(float64 a, float64 b, float_status *s)
2174 {
2175     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2176                         f64_is_zon2, f64_addsubmul_post);
2177 }
2178 
float64r32_mul(float64 a,float64 b,float_status * status)2179 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2180 {
2181     FloatParts64 pa, pb, *pr;
2182 
2183     float64_unpack_canonical(&pa, a, status);
2184     float64_unpack_canonical(&pb, b, status);
2185     pr = parts_mul(&pa, &pb, status);
2186 
2187     return float64r32_round_pack_canonical(pr, status);
2188 }
2189 
2190 bfloat16 QEMU_FLATTEN
bfloat16_mul(bfloat16 a,bfloat16 b,float_status * status)2191 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2192 {
2193     FloatParts64 pa, pb, *pr;
2194 
2195     bfloat16_unpack_canonical(&pa, a, status);
2196     bfloat16_unpack_canonical(&pb, b, status);
2197     pr = parts_mul(&pa, &pb, status);
2198 
2199     return bfloat16_round_pack_canonical(pr, status);
2200 }
2201 
2202 float128 QEMU_FLATTEN
float128_mul(float128 a,float128 b,float_status * status)2203 float128_mul(float128 a, float128 b, float_status *status)
2204 {
2205     FloatParts128 pa, pb, *pr;
2206 
2207     float128_unpack_canonical(&pa, a, status);
2208     float128_unpack_canonical(&pb, b, status);
2209     pr = parts_mul(&pa, &pb, status);
2210 
2211     return float128_round_pack_canonical(pr, status);
2212 }
2213 
2214 floatx80 QEMU_FLATTEN
floatx80_mul(floatx80 a,floatx80 b,float_status * status)2215 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2216 {
2217     FloatParts128 pa, pb, *pr;
2218 
2219     if (!floatx80_unpack_canonical(&pa, a, status) ||
2220         !floatx80_unpack_canonical(&pb, b, status)) {
2221         return floatx80_default_nan(status);
2222     }
2223 
2224     pr = parts_mul(&pa, &pb, status);
2225     return floatx80_round_pack_canonical(pr, status);
2226 }
2227 
2228 /*
2229  * Fused multiply-add
2230  */
2231 
2232 float16 QEMU_FLATTEN
float16_muladd_scalbn(float16 a,float16 b,float16 c,int scale,int flags,float_status * status)2233 float16_muladd_scalbn(float16 a, float16 b, float16 c,
2234                       int scale, int flags, float_status *status)
2235 {
2236     FloatParts64 pa, pb, pc, *pr;
2237 
2238     float16_unpack_canonical(&pa, a, status);
2239     float16_unpack_canonical(&pb, b, status);
2240     float16_unpack_canonical(&pc, c, status);
2241     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2242 
2243     return float16_round_pack_canonical(pr, status);
2244 }
2245 
float16_muladd(float16 a,float16 b,float16 c,int flags,float_status * status)2246 float16 float16_muladd(float16 a, float16 b, float16 c,
2247                        int flags, float_status *status)
2248 {
2249     return float16_muladd_scalbn(a, b, c, 0, flags, status);
2250 }
2251 
2252 float32 QEMU_SOFTFLOAT_ATTR
float32_muladd_scalbn(float32 a,float32 b,float32 c,int scale,int flags,float_status * status)2253 float32_muladd_scalbn(float32 a, float32 b, float32 c,
2254                       int scale, int flags, float_status *status)
2255 {
2256     FloatParts64 pa, pb, pc, *pr;
2257 
2258     float32_unpack_canonical(&pa, a, status);
2259     float32_unpack_canonical(&pb, b, status);
2260     float32_unpack_canonical(&pc, c, status);
2261     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2262 
2263     return float32_round_pack_canonical(pr, status);
2264 }
2265 
2266 float64 QEMU_SOFTFLOAT_ATTR
float64_muladd_scalbn(float64 a,float64 b,float64 c,int scale,int flags,float_status * status)2267 float64_muladd_scalbn(float64 a, float64 b, float64 c,
2268                       int scale, int flags, float_status *status)
2269 {
2270     FloatParts64 pa, pb, pc, *pr;
2271 
2272     float64_unpack_canonical(&pa, a, status);
2273     float64_unpack_canonical(&pb, b, status);
2274     float64_unpack_canonical(&pc, c, status);
2275     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2276 
2277     return float64_round_pack_canonical(pr, status);
2278 }
2279 
2280 static bool force_soft_fma;
2281 
2282 float32 QEMU_FLATTEN
float32_muladd(float32 xa,float32 xb,float32 xc,int flags,float_status * s)2283 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2284 {
2285     union_float32 ua, ub, uc, ur;
2286 
2287     ua.s = xa;
2288     ub.s = xb;
2289     uc.s = xc;
2290 
2291     if (unlikely(!can_use_fpu(s))) {
2292         goto soft;
2293     }
2294     if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
2295         goto soft;
2296     }
2297 
2298     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2299     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2300         goto soft;
2301     }
2302 
2303     if (unlikely(force_soft_fma)) {
2304         goto soft;
2305     }
2306 
2307     /*
2308      * When (a || b) == 0, there's no need to check for under/over flow,
2309      * since we know the addend is (normal || 0) and the product is 0.
2310      */
2311     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2312         union_float32 up;
2313         bool prod_sign;
2314 
2315         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2316         prod_sign ^= !!(flags & float_muladd_negate_product);
2317         up.s = float32_set_sign(float32_zero, prod_sign);
2318 
2319         if (flags & float_muladd_negate_c) {
2320             uc.h = -uc.h;
2321         }
2322         ur.h = up.h + uc.h;
2323     } else {
2324         union_float32 ua_orig = ua;
2325         union_float32 uc_orig = uc;
2326 
2327         if (flags & float_muladd_negate_product) {
2328             ua.h = -ua.h;
2329         }
2330         if (flags & float_muladd_negate_c) {
2331             uc.h = -uc.h;
2332         }
2333 
2334         ur.h = fmaf(ua.h, ub.h, uc.h);
2335 
2336         if (unlikely(f32_is_inf(ur))) {
2337             float_raise(float_flag_overflow, s);
2338         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2339             ua = ua_orig;
2340             uc = uc_orig;
2341             goto soft;
2342         }
2343     }
2344     if (flags & float_muladd_negate_result) {
2345         return float32_chs(ur.s);
2346     }
2347     return ur.s;
2348 
2349  soft:
2350     return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2351 }
2352 
2353 float64 QEMU_FLATTEN
float64_muladd(float64 xa,float64 xb,float64 xc,int flags,float_status * s)2354 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2355 {
2356     union_float64 ua, ub, uc, ur;
2357 
2358     ua.s = xa;
2359     ub.s = xb;
2360     uc.s = xc;
2361 
2362     if (unlikely(!can_use_fpu(s))) {
2363         goto soft;
2364     }
2365 
2366     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2367     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2368         goto soft;
2369     }
2370 
2371     if (unlikely(force_soft_fma)) {
2372         goto soft;
2373     }
2374 
2375     /*
2376      * When (a || b) == 0, there's no need to check for under/over flow,
2377      * since we know the addend is (normal || 0) and the product is 0.
2378      */
2379     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2380         union_float64 up;
2381         bool prod_sign;
2382 
2383         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2384         prod_sign ^= !!(flags & float_muladd_negate_product);
2385         up.s = float64_set_sign(float64_zero, prod_sign);
2386 
2387         if (flags & float_muladd_negate_c) {
2388             uc.h = -uc.h;
2389         }
2390         ur.h = up.h + uc.h;
2391     } else {
2392         union_float64 ua_orig = ua;
2393         union_float64 uc_orig = uc;
2394 
2395         if (flags & float_muladd_negate_product) {
2396             ua.h = -ua.h;
2397         }
2398         if (flags & float_muladd_negate_c) {
2399             uc.h = -uc.h;
2400         }
2401 
2402         ur.h = fma(ua.h, ub.h, uc.h);
2403 
2404         if (unlikely(f64_is_inf(ur))) {
2405             float_raise(float_flag_overflow, s);
2406         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2407             ua = ua_orig;
2408             uc = uc_orig;
2409             goto soft;
2410         }
2411     }
2412     if (flags & float_muladd_negate_result) {
2413         return float64_chs(ur.s);
2414     }
2415     return ur.s;
2416 
2417  soft:
2418     return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2419 }
2420 
float64r32_muladd(float64 a,float64 b,float64 c,int flags,float_status * status)2421 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2422                           int flags, float_status *status)
2423 {
2424     FloatParts64 pa, pb, pc, *pr;
2425 
2426     float64_unpack_canonical(&pa, a, status);
2427     float64_unpack_canonical(&pb, b, status);
2428     float64_unpack_canonical(&pc, c, status);
2429     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2430 
2431     return float64r32_round_pack_canonical(pr, status);
2432 }
2433 
bfloat16_muladd(bfloat16 a,bfloat16 b,bfloat16 c,int flags,float_status * status)2434 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2435                                       int flags, float_status *status)
2436 {
2437     FloatParts64 pa, pb, pc, *pr;
2438 
2439     bfloat16_unpack_canonical(&pa, a, status);
2440     bfloat16_unpack_canonical(&pb, b, status);
2441     bfloat16_unpack_canonical(&pc, c, status);
2442     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2443 
2444     return bfloat16_round_pack_canonical(pr, status);
2445 }
2446 
float128_muladd(float128 a,float128 b,float128 c,int flags,float_status * status)2447 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2448                                       int flags, float_status *status)
2449 {
2450     FloatParts128 pa, pb, pc, *pr;
2451 
2452     float128_unpack_canonical(&pa, a, status);
2453     float128_unpack_canonical(&pb, b, status);
2454     float128_unpack_canonical(&pc, c, status);
2455     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2456 
2457     return float128_round_pack_canonical(pr, status);
2458 }
2459 
2460 /*
2461  * Division
2462  */
2463 
float16_div(float16 a,float16 b,float_status * status)2464 float16 float16_div(float16 a, float16 b, float_status *status)
2465 {
2466     FloatParts64 pa, pb, *pr;
2467 
2468     float16_unpack_canonical(&pa, a, status);
2469     float16_unpack_canonical(&pb, b, status);
2470     pr = parts_div(&pa, &pb, status);
2471 
2472     return float16_round_pack_canonical(pr, status);
2473 }
2474 
2475 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_div(float32 a,float32 b,float_status * status)2476 soft_f32_div(float32 a, float32 b, float_status *status)
2477 {
2478     FloatParts64 pa, pb, *pr;
2479 
2480     float32_unpack_canonical(&pa, a, status);
2481     float32_unpack_canonical(&pb, b, status);
2482     pr = parts_div(&pa, &pb, status);
2483 
2484     return float32_round_pack_canonical(pr, status);
2485 }
2486 
2487 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_div(float64 a,float64 b,float_status * status)2488 soft_f64_div(float64 a, float64 b, float_status *status)
2489 {
2490     FloatParts64 pa, pb, *pr;
2491 
2492     float64_unpack_canonical(&pa, a, status);
2493     float64_unpack_canonical(&pb, b, status);
2494     pr = parts_div(&pa, &pb, status);
2495 
2496     return float64_round_pack_canonical(pr, status);
2497 }
2498 
hard_f32_div(float a,float b)2499 static float hard_f32_div(float a, float b)
2500 {
2501     return a / b;
2502 }
2503 
hard_f64_div(double a,double b)2504 static double hard_f64_div(double a, double b)
2505 {
2506     return a / b;
2507 }
2508 
f32_div_pre(union_float32 a,union_float32 b)2509 static bool f32_div_pre(union_float32 a, union_float32 b)
2510 {
2511     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2512         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2513                fpclassify(b.h) == FP_NORMAL;
2514     }
2515     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2516 }
2517 
f64_div_pre(union_float64 a,union_float64 b)2518 static bool f64_div_pre(union_float64 a, union_float64 b)
2519 {
2520     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2521         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2522                fpclassify(b.h) == FP_NORMAL;
2523     }
2524     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2525 }
2526 
f32_div_post(union_float32 a,union_float32 b)2527 static bool f32_div_post(union_float32 a, union_float32 b)
2528 {
2529     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2530         return fpclassify(a.h) != FP_ZERO;
2531     }
2532     return !float32_is_zero(a.s);
2533 }
2534 
f64_div_post(union_float64 a,union_float64 b)2535 static bool f64_div_post(union_float64 a, union_float64 b)
2536 {
2537     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2538         return fpclassify(a.h) != FP_ZERO;
2539     }
2540     return !float64_is_zero(a.s);
2541 }
2542 
2543 float32 QEMU_FLATTEN
float32_div(float32 a,float32 b,float_status * s)2544 float32_div(float32 a, float32 b, float_status *s)
2545 {
2546     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2547                         f32_div_pre, f32_div_post);
2548 }
2549 
2550 float64 QEMU_FLATTEN
float64_div(float64 a,float64 b,float_status * s)2551 float64_div(float64 a, float64 b, float_status *s)
2552 {
2553     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2554                         f64_div_pre, f64_div_post);
2555 }
2556 
float64r32_div(float64 a,float64 b,float_status * status)2557 float64 float64r32_div(float64 a, float64 b, float_status *status)
2558 {
2559     FloatParts64 pa, pb, *pr;
2560 
2561     float64_unpack_canonical(&pa, a, status);
2562     float64_unpack_canonical(&pb, b, status);
2563     pr = parts_div(&pa, &pb, status);
2564 
2565     return float64r32_round_pack_canonical(pr, status);
2566 }
2567 
2568 bfloat16 QEMU_FLATTEN
bfloat16_div(bfloat16 a,bfloat16 b,float_status * status)2569 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2570 {
2571     FloatParts64 pa, pb, *pr;
2572 
2573     bfloat16_unpack_canonical(&pa, a, status);
2574     bfloat16_unpack_canonical(&pb, b, status);
2575     pr = parts_div(&pa, &pb, status);
2576 
2577     return bfloat16_round_pack_canonical(pr, status);
2578 }
2579 
2580 float128 QEMU_FLATTEN
float128_div(float128 a,float128 b,float_status * status)2581 float128_div(float128 a, float128 b, float_status *status)
2582 {
2583     FloatParts128 pa, pb, *pr;
2584 
2585     float128_unpack_canonical(&pa, a, status);
2586     float128_unpack_canonical(&pb, b, status);
2587     pr = parts_div(&pa, &pb, status);
2588 
2589     return float128_round_pack_canonical(pr, status);
2590 }
2591 
floatx80_div(floatx80 a,floatx80 b,float_status * status)2592 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2593 {
2594     FloatParts128 pa, pb, *pr;
2595 
2596     if (!floatx80_unpack_canonical(&pa, a, status) ||
2597         !floatx80_unpack_canonical(&pb, b, status)) {
2598         return floatx80_default_nan(status);
2599     }
2600 
2601     pr = parts_div(&pa, &pb, status);
2602     return floatx80_round_pack_canonical(pr, status);
2603 }
2604 
2605 /*
2606  * Remainder
2607  */
2608 
float32_rem(float32 a,float32 b,float_status * status)2609 float32 float32_rem(float32 a, float32 b, float_status *status)
2610 {
2611     FloatParts64 pa, pb, *pr;
2612 
2613     float32_unpack_canonical(&pa, a, status);
2614     float32_unpack_canonical(&pb, b, status);
2615     pr = parts_modrem(&pa, &pb, NULL, status);
2616 
2617     return float32_round_pack_canonical(pr, status);
2618 }
2619 
float64_rem(float64 a,float64 b,float_status * status)2620 float64 float64_rem(float64 a, float64 b, float_status *status)
2621 {
2622     FloatParts64 pa, pb, *pr;
2623 
2624     float64_unpack_canonical(&pa, a, status);
2625     float64_unpack_canonical(&pb, b, status);
2626     pr = parts_modrem(&pa, &pb, NULL, status);
2627 
2628     return float64_round_pack_canonical(pr, status);
2629 }
2630 
float128_rem(float128 a,float128 b,float_status * status)2631 float128 float128_rem(float128 a, float128 b, float_status *status)
2632 {
2633     FloatParts128 pa, pb, *pr;
2634 
2635     float128_unpack_canonical(&pa, a, status);
2636     float128_unpack_canonical(&pb, b, status);
2637     pr = parts_modrem(&pa, &pb, NULL, status);
2638 
2639     return float128_round_pack_canonical(pr, status);
2640 }
2641 
2642 /*
2643  * Returns the remainder of the extended double-precision floating-point value
2644  * `a' with respect to the corresponding value `b'.
2645  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2646  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2647  * the remainder based on truncating the quotient toward zero instead and
2648  * *quotient is set to the low 64 bits of the absolute value of the integer
2649  * quotient.
2650  */
floatx80_modrem(floatx80 a,floatx80 b,bool mod,uint64_t * quotient,float_status * status)2651 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2652                          uint64_t *quotient, float_status *status)
2653 {
2654     FloatParts128 pa, pb, *pr;
2655 
2656     *quotient = 0;
2657     if (!floatx80_unpack_canonical(&pa, a, status) ||
2658         !floatx80_unpack_canonical(&pb, b, status)) {
2659         return floatx80_default_nan(status);
2660     }
2661     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2662 
2663     return floatx80_round_pack_canonical(pr, status);
2664 }
2665 
floatx80_rem(floatx80 a,floatx80 b,float_status * status)2666 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2667 {
2668     uint64_t quotient;
2669     return floatx80_modrem(a, b, false, &quotient, status);
2670 }
2671 
floatx80_mod(floatx80 a,floatx80 b,float_status * status)2672 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2673 {
2674     uint64_t quotient;
2675     return floatx80_modrem(a, b, true, &quotient, status);
2676 }
2677 
2678 /*
2679  * Float to Float conversions
2680  *
2681  * Returns the result of converting one float format to another. The
2682  * conversion is performed according to the IEC/IEEE Standard for
2683  * Binary Floating-Point Arithmetic.
2684  *
2685  * Usually this only needs to take care of raising invalid exceptions
2686  * and handling the conversion on NaNs.
2687  */
2688 
parts_float_to_ahp(FloatParts64 * a,float_status * s)2689 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2690 {
2691     switch (a->cls) {
2692     case float_class_snan:
2693         float_raise(float_flag_invalid_snan, s);
2694         /* fall through */
2695     case float_class_qnan:
2696         /*
2697          * There is no NaN in the destination format.  Raise Invalid
2698          * and return a zero with the sign of the input NaN.
2699          */
2700         float_raise(float_flag_invalid, s);
2701         a->cls = float_class_zero;
2702         break;
2703 
2704     case float_class_inf:
2705         /*
2706          * There is no Inf in the destination format.  Raise Invalid
2707          * and return the maximum normal with the correct sign.
2708          */
2709         float_raise(float_flag_invalid, s);
2710         a->cls = float_class_normal;
2711         a->exp = float16_params_ahp.exp_max;
2712         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2713                                   float16_params_ahp.frac_size + 1);
2714         break;
2715 
2716     case float_class_denormal:
2717         float_raise(float_flag_input_denormal_used, s);
2718         break;
2719     case float_class_normal:
2720     case float_class_zero:
2721         break;
2722 
2723     default:
2724         g_assert_not_reached();
2725     }
2726 }
2727 
parts64_float_to_float(FloatParts64 * a,float_status * s)2728 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2729 {
2730     if (is_nan(a->cls)) {
2731         parts_return_nan(a, s);
2732     }
2733     if (a->cls == float_class_denormal) {
2734         float_raise(float_flag_input_denormal_used, s);
2735     }
2736 }
2737 
parts128_float_to_float(FloatParts128 * a,float_status * s)2738 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2739 {
2740     if (is_nan(a->cls)) {
2741         parts_return_nan(a, s);
2742     }
2743     if (a->cls == float_class_denormal) {
2744         float_raise(float_flag_input_denormal_used, s);
2745     }
2746 }
2747 
2748 #define parts_float_to_float(P, S) \
2749     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2750 
parts_float_to_float_narrow(FloatParts64 * a,FloatParts128 * b,float_status * s)2751 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2752                                         float_status *s)
2753 {
2754     a->cls = b->cls;
2755     a->sign = b->sign;
2756     a->exp = b->exp;
2757 
2758     switch (a->cls) {
2759     case float_class_denormal:
2760         float_raise(float_flag_input_denormal_used, s);
2761         /* fall through */
2762     case float_class_normal:
2763         frac_truncjam(a, b);
2764         break;
2765     case float_class_snan:
2766     case float_class_qnan:
2767         /* Discard the low bits of the NaN. */
2768         a->frac = b->frac_hi;
2769         parts_return_nan(a, s);
2770         break;
2771     default:
2772         break;
2773     }
2774 }
2775 
parts_float_to_float_widen(FloatParts128 * a,FloatParts64 * b,float_status * s)2776 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2777                                        float_status *s)
2778 {
2779     a->cls = b->cls;
2780     a->sign = b->sign;
2781     a->exp = b->exp;
2782     frac_widen(a, b);
2783 
2784     if (is_nan(a->cls)) {
2785         parts_return_nan(a, s);
2786     }
2787     if (a->cls == float_class_denormal) {
2788         float_raise(float_flag_input_denormal_used, s);
2789     }
2790 }
2791 
float16_to_float32(float16 a,bool ieee,float_status * s)2792 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2793 {
2794     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2795     FloatParts64 p;
2796 
2797     float16a_unpack_canonical(&p, a, s, fmt16);
2798     parts_float_to_float(&p, s);
2799     return float32_round_pack_canonical(&p, s);
2800 }
2801 
float16_to_float64(float16 a,bool ieee,float_status * s)2802 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2803 {
2804     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2805     FloatParts64 p;
2806 
2807     float16a_unpack_canonical(&p, a, s, fmt16);
2808     parts_float_to_float(&p, s);
2809     return float64_round_pack_canonical(&p, s);
2810 }
2811 
float32_to_float16(float32 a,bool ieee,float_status * s)2812 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2813 {
2814     FloatParts64 p;
2815     const FloatFmt *fmt;
2816 
2817     float32_unpack_canonical(&p, a, s);
2818     if (ieee) {
2819         parts_float_to_float(&p, s);
2820         fmt = &float16_params;
2821     } else {
2822         parts_float_to_ahp(&p, s);
2823         fmt = &float16_params_ahp;
2824     }
2825     return float16a_round_pack_canonical(&p, s, fmt);
2826 }
2827 
2828 static float64 QEMU_SOFTFLOAT_ATTR
soft_float32_to_float64(float32 a,float_status * s)2829 soft_float32_to_float64(float32 a, float_status *s)
2830 {
2831     FloatParts64 p;
2832 
2833     float32_unpack_canonical(&p, a, s);
2834     parts_float_to_float(&p, s);
2835     return float64_round_pack_canonical(&p, s);
2836 }
2837 
float32_to_float64(float32 a,float_status * s)2838 float64 float32_to_float64(float32 a, float_status *s)
2839 {
2840     if (likely(float32_is_normal(a))) {
2841         /* Widening conversion can never produce inexact results.  */
2842         union_float32 uf;
2843         union_float64 ud;
2844         uf.s = a;
2845         ud.h = uf.h;
2846         return ud.s;
2847     } else if (float32_is_zero(a)) {
2848         return float64_set_sign(float64_zero, float32_is_neg(a));
2849     } else {
2850         return soft_float32_to_float64(a, s);
2851     }
2852 }
2853 
float64_to_float16(float64 a,bool ieee,float_status * s)2854 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2855 {
2856     FloatParts64 p;
2857     const FloatFmt *fmt;
2858 
2859     float64_unpack_canonical(&p, a, s);
2860     if (ieee) {
2861         parts_float_to_float(&p, s);
2862         fmt = &float16_params;
2863     } else {
2864         parts_float_to_ahp(&p, s);
2865         fmt = &float16_params_ahp;
2866     }
2867     return float16a_round_pack_canonical(&p, s, fmt);
2868 }
2869 
float64_to_float32(float64 a,float_status * s)2870 float32 float64_to_float32(float64 a, float_status *s)
2871 {
2872     FloatParts64 p;
2873 
2874     float64_unpack_canonical(&p, a, s);
2875     parts_float_to_float(&p, s);
2876     return float32_round_pack_canonical(&p, s);
2877 }
2878 
bfloat16_to_float32(bfloat16 a,float_status * s)2879 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2880 {
2881     FloatParts64 p;
2882 
2883     bfloat16_unpack_canonical(&p, a, s);
2884     parts_float_to_float(&p, s);
2885     return float32_round_pack_canonical(&p, s);
2886 }
2887 
bfloat16_to_float64(bfloat16 a,float_status * s)2888 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2889 {
2890     FloatParts64 p;
2891 
2892     bfloat16_unpack_canonical(&p, a, s);
2893     parts_float_to_float(&p, s);
2894     return float64_round_pack_canonical(&p, s);
2895 }
2896 
float32_to_bfloat16(float32 a,float_status * s)2897 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2898 {
2899     FloatParts64 p;
2900 
2901     float32_unpack_canonical(&p, a, s);
2902     parts_float_to_float(&p, s);
2903     return bfloat16_round_pack_canonical(&p, s);
2904 }
2905 
float64_to_bfloat16(float64 a,float_status * s)2906 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2907 {
2908     FloatParts64 p;
2909 
2910     float64_unpack_canonical(&p, a, s);
2911     parts_float_to_float(&p, s);
2912     return bfloat16_round_pack_canonical(&p, s);
2913 }
2914 
float128_to_float32(float128 a,float_status * s)2915 float32 float128_to_float32(float128 a, float_status *s)
2916 {
2917     FloatParts64 p64;
2918     FloatParts128 p128;
2919 
2920     float128_unpack_canonical(&p128, a, s);
2921     parts_float_to_float_narrow(&p64, &p128, s);
2922     return float32_round_pack_canonical(&p64, s);
2923 }
2924 
float128_to_float64(float128 a,float_status * s)2925 float64 float128_to_float64(float128 a, float_status *s)
2926 {
2927     FloatParts64 p64;
2928     FloatParts128 p128;
2929 
2930     float128_unpack_canonical(&p128, a, s);
2931     parts_float_to_float_narrow(&p64, &p128, s);
2932     return float64_round_pack_canonical(&p64, s);
2933 }
2934 
float32_to_float128(float32 a,float_status * s)2935 float128 float32_to_float128(float32 a, float_status *s)
2936 {
2937     FloatParts64 p64;
2938     FloatParts128 p128;
2939 
2940     float32_unpack_canonical(&p64, a, s);
2941     parts_float_to_float_widen(&p128, &p64, s);
2942     return float128_round_pack_canonical(&p128, s);
2943 }
2944 
float64_to_float128(float64 a,float_status * s)2945 float128 float64_to_float128(float64 a, float_status *s)
2946 {
2947     FloatParts64 p64;
2948     FloatParts128 p128;
2949 
2950     float64_unpack_canonical(&p64, a, s);
2951     parts_float_to_float_widen(&p128, &p64, s);
2952     return float128_round_pack_canonical(&p128, s);
2953 }
2954 
floatx80_to_float32(floatx80 a,float_status * s)2955 float32 floatx80_to_float32(floatx80 a, float_status *s)
2956 {
2957     FloatParts64 p64;
2958     FloatParts128 p128;
2959 
2960     if (floatx80_unpack_canonical(&p128, a, s)) {
2961         parts_float_to_float_narrow(&p64, &p128, s);
2962     } else {
2963         parts_default_nan(&p64, s);
2964     }
2965     return float32_round_pack_canonical(&p64, s);
2966 }
2967 
floatx80_to_float64(floatx80 a,float_status * s)2968 float64 floatx80_to_float64(floatx80 a, float_status *s)
2969 {
2970     FloatParts64 p64;
2971     FloatParts128 p128;
2972 
2973     if (floatx80_unpack_canonical(&p128, a, s)) {
2974         parts_float_to_float_narrow(&p64, &p128, s);
2975     } else {
2976         parts_default_nan(&p64, s);
2977     }
2978     return float64_round_pack_canonical(&p64, s);
2979 }
2980 
floatx80_to_float128(floatx80 a,float_status * s)2981 float128 floatx80_to_float128(floatx80 a, float_status *s)
2982 {
2983     FloatParts128 p;
2984 
2985     if (floatx80_unpack_canonical(&p, a, s)) {
2986         parts_float_to_float(&p, s);
2987     } else {
2988         parts_default_nan(&p, s);
2989     }
2990     return float128_round_pack_canonical(&p, s);
2991 }
2992 
float32_to_floatx80(float32 a,float_status * s)2993 floatx80 float32_to_floatx80(float32 a, float_status *s)
2994 {
2995     FloatParts64 p64;
2996     FloatParts128 p128;
2997 
2998     float32_unpack_canonical(&p64, a, s);
2999     parts_float_to_float_widen(&p128, &p64, s);
3000     return floatx80_round_pack_canonical(&p128, s);
3001 }
3002 
float64_to_floatx80(float64 a,float_status * s)3003 floatx80 float64_to_floatx80(float64 a, float_status *s)
3004 {
3005     FloatParts64 p64;
3006     FloatParts128 p128;
3007 
3008     float64_unpack_canonical(&p64, a, s);
3009     parts_float_to_float_widen(&p128, &p64, s);
3010     return floatx80_round_pack_canonical(&p128, s);
3011 }
3012 
float128_to_floatx80(float128 a,float_status * s)3013 floatx80 float128_to_floatx80(float128 a, float_status *s)
3014 {
3015     FloatParts128 p;
3016 
3017     float128_unpack_canonical(&p, a, s);
3018     parts_float_to_float(&p, s);
3019     return floatx80_round_pack_canonical(&p, s);
3020 }
3021 
3022 /*
3023  * Round to integral value
3024  */
3025 
float16_round_to_int(float16 a,float_status * s)3026 float16 float16_round_to_int(float16 a, float_status *s)
3027 {
3028     FloatParts64 p;
3029 
3030     float16_unpack_canonical(&p, a, s);
3031     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
3032     return float16_round_pack_canonical(&p, s);
3033 }
3034 
float32_round_to_int(float32 a,float_status * s)3035 float32 float32_round_to_int(float32 a, float_status *s)
3036 {
3037     FloatParts64 p;
3038 
3039     float32_unpack_canonical(&p, a, s);
3040     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
3041     return float32_round_pack_canonical(&p, s);
3042 }
3043 
float64_round_to_int(float64 a,float_status * s)3044 float64 float64_round_to_int(float64 a, float_status *s)
3045 {
3046     FloatParts64 p;
3047 
3048     float64_unpack_canonical(&p, a, s);
3049     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3050     return float64_round_pack_canonical(&p, s);
3051 }
3052 
bfloat16_round_to_int(bfloat16 a,float_status * s)3053 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3054 {
3055     FloatParts64 p;
3056 
3057     bfloat16_unpack_canonical(&p, a, s);
3058     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3059     return bfloat16_round_pack_canonical(&p, s);
3060 }
3061 
float128_round_to_int(float128 a,float_status * s)3062 float128 float128_round_to_int(float128 a, float_status *s)
3063 {
3064     FloatParts128 p;
3065 
3066     float128_unpack_canonical(&p, a, s);
3067     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3068     return float128_round_pack_canonical(&p, s);
3069 }
3070 
floatx80_round_to_int(floatx80 a,float_status * status)3071 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3072 {
3073     FloatParts128 p;
3074 
3075     if (!floatx80_unpack_canonical(&p, a, status)) {
3076         return floatx80_default_nan(status);
3077     }
3078 
3079     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3080                        &floatx80_params[status->floatx80_rounding_precision]);
3081     return floatx80_round_pack_canonical(&p, status);
3082 }
3083 
3084 /*
3085  * Floating-point to signed integer conversions
3086  */
3087 
float16_to_int8_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3088 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3089                               float_status *s)
3090 {
3091     FloatParts64 p;
3092 
3093     float16_unpack_canonical(&p, a, s);
3094     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3095 }
3096 
float16_to_int16_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3097 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3098                                 float_status *s)
3099 {
3100     FloatParts64 p;
3101 
3102     float16_unpack_canonical(&p, a, s);
3103     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3104 }
3105 
float16_to_int32_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3106 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3107                                 float_status *s)
3108 {
3109     FloatParts64 p;
3110 
3111     float16_unpack_canonical(&p, a, s);
3112     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3113 }
3114 
float16_to_int64_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3115 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3116                                 float_status *s)
3117 {
3118     FloatParts64 p;
3119 
3120     float16_unpack_canonical(&p, a, s);
3121     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3122 }
3123 
float32_to_int16_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3124 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3125                                 float_status *s)
3126 {
3127     FloatParts64 p;
3128 
3129     float32_unpack_canonical(&p, a, s);
3130     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3131 }
3132 
float32_to_int32_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3133 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3134                                 float_status *s)
3135 {
3136     FloatParts64 p;
3137 
3138     float32_unpack_canonical(&p, a, s);
3139     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3140 }
3141 
float32_to_int64_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3142 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3143                                 float_status *s)
3144 {
3145     FloatParts64 p;
3146 
3147     float32_unpack_canonical(&p, a, s);
3148     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3149 }
3150 
float64_to_int16_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3151 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3152                                 float_status *s)
3153 {
3154     FloatParts64 p;
3155 
3156     float64_unpack_canonical(&p, a, s);
3157     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3158 }
3159 
float64_to_int32_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3160 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3161                                 float_status *s)
3162 {
3163     FloatParts64 p;
3164 
3165     float64_unpack_canonical(&p, a, s);
3166     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3167 }
3168 
float64_to_int64_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3169 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3170                                 float_status *s)
3171 {
3172     FloatParts64 p;
3173 
3174     float64_unpack_canonical(&p, a, s);
3175     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3176 }
3177 
bfloat16_to_int8_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3178 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3179                                float_status *s)
3180 {
3181     FloatParts64 p;
3182 
3183     bfloat16_unpack_canonical(&p, a, s);
3184     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3185 }
3186 
bfloat16_to_int16_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3187 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3188                                  float_status *s)
3189 {
3190     FloatParts64 p;
3191 
3192     bfloat16_unpack_canonical(&p, a, s);
3193     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3194 }
3195 
bfloat16_to_int32_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3196 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3197                                  float_status *s)
3198 {
3199     FloatParts64 p;
3200 
3201     bfloat16_unpack_canonical(&p, a, s);
3202     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3203 }
3204 
bfloat16_to_int64_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3205 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3206                                  float_status *s)
3207 {
3208     FloatParts64 p;
3209 
3210     bfloat16_unpack_canonical(&p, a, s);
3211     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3212 }
3213 
float128_to_int32_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3214 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3215                                         int scale, float_status *s)
3216 {
3217     FloatParts128 p;
3218 
3219     float128_unpack_canonical(&p, a, s);
3220     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3221 }
3222 
float128_to_int64_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3223 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3224                                         int scale, float_status *s)
3225 {
3226     FloatParts128 p;
3227 
3228     float128_unpack_canonical(&p, a, s);
3229     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3230 }
3231 
float128_to_int128_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3232 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3233                                         int scale, float_status *s)
3234 {
3235     int flags = 0;
3236     Int128 r;
3237     FloatParts128 p;
3238 
3239     float128_unpack_canonical(&p, a, s);
3240 
3241     switch (p.cls) {
3242     case float_class_snan:
3243         flags |= float_flag_invalid_snan;
3244         /* fall through */
3245     case float_class_qnan:
3246         flags |= float_flag_invalid;
3247         r = UINT128_MAX;
3248         break;
3249 
3250     case float_class_inf:
3251         flags = float_flag_invalid | float_flag_invalid_cvti;
3252         r = p.sign ? INT128_MIN : INT128_MAX;
3253         break;
3254 
3255     case float_class_zero:
3256         return int128_zero();
3257 
3258     case float_class_normal:
3259     case float_class_denormal:
3260         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3261             flags = float_flag_inexact;
3262         }
3263 
3264         if (p.exp < 127) {
3265             int shift = 127 - p.exp;
3266             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3267             if (p.sign) {
3268                 r = int128_neg(r);
3269             }
3270         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3271                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3272             r = INT128_MIN;
3273         } else {
3274             flags = float_flag_invalid | float_flag_invalid_cvti;
3275             r = p.sign ? INT128_MIN : INT128_MAX;
3276         }
3277         break;
3278 
3279     default:
3280         g_assert_not_reached();
3281     }
3282 
3283     float_raise(flags, s);
3284     return r;
3285 }
3286 
floatx80_to_int32_scalbn(floatx80 a,FloatRoundMode rmode,int scale,float_status * s)3287 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3288                                         int scale, float_status *s)
3289 {
3290     FloatParts128 p;
3291 
3292     if (!floatx80_unpack_canonical(&p, a, s)) {
3293         parts_default_nan(&p, s);
3294     }
3295     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3296 }
3297 
floatx80_to_int64_scalbn(floatx80 a,FloatRoundMode rmode,int scale,float_status * s)3298 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3299                                         int scale, float_status *s)
3300 {
3301     FloatParts128 p;
3302 
3303     if (!floatx80_unpack_canonical(&p, a, s)) {
3304         parts_default_nan(&p, s);
3305     }
3306     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3307 }
3308 
float16_to_int8(float16 a,float_status * s)3309 int8_t float16_to_int8(float16 a, float_status *s)
3310 {
3311     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3312 }
3313 
float16_to_int16(float16 a,float_status * s)3314 int16_t float16_to_int16(float16 a, float_status *s)
3315 {
3316     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3317 }
3318 
float16_to_int32(float16 a,float_status * s)3319 int32_t float16_to_int32(float16 a, float_status *s)
3320 {
3321     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3322 }
3323 
float16_to_int64(float16 a,float_status * s)3324 int64_t float16_to_int64(float16 a, float_status *s)
3325 {
3326     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3327 }
3328 
float32_to_int16(float32 a,float_status * s)3329 int16_t float32_to_int16(float32 a, float_status *s)
3330 {
3331     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3332 }
3333 
float32_to_int32(float32 a,float_status * s)3334 int32_t float32_to_int32(float32 a, float_status *s)
3335 {
3336     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3337 }
3338 
float32_to_int64(float32 a,float_status * s)3339 int64_t float32_to_int64(float32 a, float_status *s)
3340 {
3341     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3342 }
3343 
float64_to_int16(float64 a,float_status * s)3344 int16_t float64_to_int16(float64 a, float_status *s)
3345 {
3346     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3347 }
3348 
float64_to_int32(float64 a,float_status * s)3349 int32_t float64_to_int32(float64 a, float_status *s)
3350 {
3351     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3352 }
3353 
float64_to_int64(float64 a,float_status * s)3354 int64_t float64_to_int64(float64 a, float_status *s)
3355 {
3356     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3357 }
3358 
float128_to_int32(float128 a,float_status * s)3359 int32_t float128_to_int32(float128 a, float_status *s)
3360 {
3361     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3362 }
3363 
float128_to_int64(float128 a,float_status * s)3364 int64_t float128_to_int64(float128 a, float_status *s)
3365 {
3366     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3367 }
3368 
float128_to_int128(float128 a,float_status * s)3369 Int128 float128_to_int128(float128 a, float_status *s)
3370 {
3371     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3372 }
3373 
floatx80_to_int32(floatx80 a,float_status * s)3374 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3375 {
3376     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3377 }
3378 
floatx80_to_int64(floatx80 a,float_status * s)3379 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3380 {
3381     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3382 }
3383 
float16_to_int16_round_to_zero(float16 a,float_status * s)3384 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3385 {
3386     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3387 }
3388 
float16_to_int32_round_to_zero(float16 a,float_status * s)3389 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3390 {
3391     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3392 }
3393 
float16_to_int64_round_to_zero(float16 a,float_status * s)3394 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3395 {
3396     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3397 }
3398 
float32_to_int16_round_to_zero(float32 a,float_status * s)3399 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3400 {
3401     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3402 }
3403 
float32_to_int32_round_to_zero(float32 a,float_status * s)3404 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3405 {
3406     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3407 }
3408 
float32_to_int64_round_to_zero(float32 a,float_status * s)3409 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3410 {
3411     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3412 }
3413 
float64_to_int16_round_to_zero(float64 a,float_status * s)3414 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3415 {
3416     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3417 }
3418 
float64_to_int32_round_to_zero(float64 a,float_status * s)3419 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3420 {
3421     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3422 }
3423 
float64_to_int64_round_to_zero(float64 a,float_status * s)3424 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3425 {
3426     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3427 }
3428 
float128_to_int32_round_to_zero(float128 a,float_status * s)3429 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3430 {
3431     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3432 }
3433 
float128_to_int64_round_to_zero(float128 a,float_status * s)3434 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3435 {
3436     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3437 }
3438 
float128_to_int128_round_to_zero(float128 a,float_status * s)3439 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3440 {
3441     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3442 }
3443 
floatx80_to_int32_round_to_zero(floatx80 a,float_status * s)3444 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3445 {
3446     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3447 }
3448 
floatx80_to_int64_round_to_zero(floatx80 a,float_status * s)3449 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3450 {
3451     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3452 }
3453 
bfloat16_to_int8(bfloat16 a,float_status * s)3454 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3455 {
3456     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3457 }
3458 
bfloat16_to_int16(bfloat16 a,float_status * s)3459 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3460 {
3461     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3462 }
3463 
bfloat16_to_int32(bfloat16 a,float_status * s)3464 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3465 {
3466     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3467 }
3468 
bfloat16_to_int64(bfloat16 a,float_status * s)3469 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3470 {
3471     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3472 }
3473 
bfloat16_to_int8_round_to_zero(bfloat16 a,float_status * s)3474 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3475 {
3476     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3477 }
3478 
bfloat16_to_int16_round_to_zero(bfloat16 a,float_status * s)3479 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3480 {
3481     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3482 }
3483 
bfloat16_to_int32_round_to_zero(bfloat16 a,float_status * s)3484 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3485 {
3486     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3487 }
3488 
bfloat16_to_int64_round_to_zero(bfloat16 a,float_status * s)3489 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3490 {
3491     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3492 }
3493 
float64_to_int32_modulo(float64 a,FloatRoundMode rmode,float_status * s)3494 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3495                                 float_status *s)
3496 {
3497     FloatParts64 p;
3498 
3499     float64_unpack_canonical(&p, a, s);
3500     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3501 }
3502 
float64_to_int64_modulo(float64 a,FloatRoundMode rmode,float_status * s)3503 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3504                                 float_status *s)
3505 {
3506     FloatParts64 p;
3507 
3508     float64_unpack_canonical(&p, a, s);
3509     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3510 }
3511 
3512 /*
3513  * Floating-point to unsigned integer conversions
3514  */
3515 
float16_to_uint8_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3516 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3517                                 float_status *s)
3518 {
3519     FloatParts64 p;
3520 
3521     float16_unpack_canonical(&p, a, s);
3522     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3523 }
3524 
float16_to_uint16_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3525 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3526                                   float_status *s)
3527 {
3528     FloatParts64 p;
3529 
3530     float16_unpack_canonical(&p, a, s);
3531     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3532 }
3533 
float16_to_uint32_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3534 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3535                                   float_status *s)
3536 {
3537     FloatParts64 p;
3538 
3539     float16_unpack_canonical(&p, a, s);
3540     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3541 }
3542 
float16_to_uint64_scalbn(float16 a,FloatRoundMode rmode,int scale,float_status * s)3543 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3544                                   float_status *s)
3545 {
3546     FloatParts64 p;
3547 
3548     float16_unpack_canonical(&p, a, s);
3549     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3550 }
3551 
float32_to_uint16_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3552 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3553                                   float_status *s)
3554 {
3555     FloatParts64 p;
3556 
3557     float32_unpack_canonical(&p, a, s);
3558     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3559 }
3560 
float32_to_uint32_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3561 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3562                                   float_status *s)
3563 {
3564     FloatParts64 p;
3565 
3566     float32_unpack_canonical(&p, a, s);
3567     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3568 }
3569 
float32_to_uint64_scalbn(float32 a,FloatRoundMode rmode,int scale,float_status * s)3570 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3571                                   float_status *s)
3572 {
3573     FloatParts64 p;
3574 
3575     float32_unpack_canonical(&p, a, s);
3576     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3577 }
3578 
float64_to_uint16_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3579 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3580                                   float_status *s)
3581 {
3582     FloatParts64 p;
3583 
3584     float64_unpack_canonical(&p, a, s);
3585     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3586 }
3587 
float64_to_uint32_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3588 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3589                                   float_status *s)
3590 {
3591     FloatParts64 p;
3592 
3593     float64_unpack_canonical(&p, a, s);
3594     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3595 }
3596 
float64_to_uint64_scalbn(float64 a,FloatRoundMode rmode,int scale,float_status * s)3597 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3598                                   float_status *s)
3599 {
3600     FloatParts64 p;
3601 
3602     float64_unpack_canonical(&p, a, s);
3603     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3604 }
3605 
bfloat16_to_uint8_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3606 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3607                                  int scale, float_status *s)
3608 {
3609     FloatParts64 p;
3610 
3611     bfloat16_unpack_canonical(&p, a, s);
3612     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3613 }
3614 
bfloat16_to_uint16_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3615 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3616                                    int scale, float_status *s)
3617 {
3618     FloatParts64 p;
3619 
3620     bfloat16_unpack_canonical(&p, a, s);
3621     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3622 }
3623 
bfloat16_to_uint32_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3624 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3625                                    int scale, float_status *s)
3626 {
3627     FloatParts64 p;
3628 
3629     bfloat16_unpack_canonical(&p, a, s);
3630     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3631 }
3632 
bfloat16_to_uint64_scalbn(bfloat16 a,FloatRoundMode rmode,int scale,float_status * s)3633 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3634                                    int scale, float_status *s)
3635 {
3636     FloatParts64 p;
3637 
3638     bfloat16_unpack_canonical(&p, a, s);
3639     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3640 }
3641 
float128_to_uint32_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3642 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3643                                           int scale, float_status *s)
3644 {
3645     FloatParts128 p;
3646 
3647     float128_unpack_canonical(&p, a, s);
3648     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3649 }
3650 
float128_to_uint64_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3651 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3652                                           int scale, float_status *s)
3653 {
3654     FloatParts128 p;
3655 
3656     float128_unpack_canonical(&p, a, s);
3657     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3658 }
3659 
float128_to_uint128_scalbn(float128 a,FloatRoundMode rmode,int scale,float_status * s)3660 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3661                                          int scale, float_status *s)
3662 {
3663     int flags = 0;
3664     Int128 r;
3665     FloatParts128 p;
3666 
3667     float128_unpack_canonical(&p, a, s);
3668 
3669     switch (p.cls) {
3670     case float_class_snan:
3671         flags |= float_flag_invalid_snan;
3672         /* fall through */
3673     case float_class_qnan:
3674         flags |= float_flag_invalid;
3675         r = UINT128_MAX;
3676         break;
3677 
3678     case float_class_inf:
3679         flags = float_flag_invalid | float_flag_invalid_cvti;
3680         r = p.sign ? int128_zero() : UINT128_MAX;
3681         break;
3682 
3683     case float_class_zero:
3684         return int128_zero();
3685 
3686     case float_class_normal:
3687     case float_class_denormal:
3688         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3689             flags = float_flag_inexact;
3690             if (p.cls == float_class_zero) {
3691                 r = int128_zero();
3692                 break;
3693             }
3694         }
3695 
3696         if (p.sign) {
3697             flags = float_flag_invalid | float_flag_invalid_cvti;
3698             r = int128_zero();
3699         } else if (p.exp <= 127) {
3700             int shift = 127 - p.exp;
3701             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3702         } else {
3703             flags = float_flag_invalid | float_flag_invalid_cvti;
3704             r = UINT128_MAX;
3705         }
3706         break;
3707 
3708     default:
3709         g_assert_not_reached();
3710     }
3711 
3712     float_raise(flags, s);
3713     return r;
3714 }
3715 
float16_to_uint8(float16 a,float_status * s)3716 uint8_t float16_to_uint8(float16 a, float_status *s)
3717 {
3718     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3719 }
3720 
float16_to_uint16(float16 a,float_status * s)3721 uint16_t float16_to_uint16(float16 a, float_status *s)
3722 {
3723     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3724 }
3725 
float16_to_uint32(float16 a,float_status * s)3726 uint32_t float16_to_uint32(float16 a, float_status *s)
3727 {
3728     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3729 }
3730 
float16_to_uint64(float16 a,float_status * s)3731 uint64_t float16_to_uint64(float16 a, float_status *s)
3732 {
3733     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3734 }
3735 
float32_to_uint16(float32 a,float_status * s)3736 uint16_t float32_to_uint16(float32 a, float_status *s)
3737 {
3738     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3739 }
3740 
float32_to_uint32(float32 a,float_status * s)3741 uint32_t float32_to_uint32(float32 a, float_status *s)
3742 {
3743     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3744 }
3745 
float32_to_uint64(float32 a,float_status * s)3746 uint64_t float32_to_uint64(float32 a, float_status *s)
3747 {
3748     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3749 }
3750 
float64_to_uint16(float64 a,float_status * s)3751 uint16_t float64_to_uint16(float64 a, float_status *s)
3752 {
3753     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3754 }
3755 
float64_to_uint32(float64 a,float_status * s)3756 uint32_t float64_to_uint32(float64 a, float_status *s)
3757 {
3758     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3759 }
3760 
float64_to_uint64(float64 a,float_status * s)3761 uint64_t float64_to_uint64(float64 a, float_status *s)
3762 {
3763     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3764 }
3765 
float128_to_uint32(float128 a,float_status * s)3766 uint32_t float128_to_uint32(float128 a, float_status *s)
3767 {
3768     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3769 }
3770 
float128_to_uint64(float128 a,float_status * s)3771 uint64_t float128_to_uint64(float128 a, float_status *s)
3772 {
3773     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3774 }
3775 
float128_to_uint128(float128 a,float_status * s)3776 Int128 float128_to_uint128(float128 a, float_status *s)
3777 {
3778     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3779 }
3780 
float16_to_uint16_round_to_zero(float16 a,float_status * s)3781 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3782 {
3783     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3784 }
3785 
float16_to_uint32_round_to_zero(float16 a,float_status * s)3786 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3787 {
3788     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3789 }
3790 
float16_to_uint64_round_to_zero(float16 a,float_status * s)3791 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3792 {
3793     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3794 }
3795 
float32_to_uint16_round_to_zero(float32 a,float_status * s)3796 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3797 {
3798     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3799 }
3800 
float32_to_uint32_round_to_zero(float32 a,float_status * s)3801 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3802 {
3803     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3804 }
3805 
float32_to_uint64_round_to_zero(float32 a,float_status * s)3806 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3807 {
3808     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3809 }
3810 
float64_to_uint16_round_to_zero(float64 a,float_status * s)3811 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3812 {
3813     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3814 }
3815 
float64_to_uint32_round_to_zero(float64 a,float_status * s)3816 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3817 {
3818     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3819 }
3820 
float64_to_uint64_round_to_zero(float64 a,float_status * s)3821 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3822 {
3823     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3824 }
3825 
float128_to_uint32_round_to_zero(float128 a,float_status * s)3826 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3827 {
3828     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3829 }
3830 
float128_to_uint64_round_to_zero(float128 a,float_status * s)3831 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3832 {
3833     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3834 }
3835 
float128_to_uint128_round_to_zero(float128 a,float_status * s)3836 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3837 {
3838     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3839 }
3840 
bfloat16_to_uint8(bfloat16 a,float_status * s)3841 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3842 {
3843     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3844 }
3845 
bfloat16_to_uint16(bfloat16 a,float_status * s)3846 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3847 {
3848     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3849 }
3850 
bfloat16_to_uint32(bfloat16 a,float_status * s)3851 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3852 {
3853     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3854 }
3855 
bfloat16_to_uint64(bfloat16 a,float_status * s)3856 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3857 {
3858     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3859 }
3860 
bfloat16_to_uint8_round_to_zero(bfloat16 a,float_status * s)3861 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3862 {
3863     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3864 }
3865 
bfloat16_to_uint16_round_to_zero(bfloat16 a,float_status * s)3866 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3867 {
3868     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3869 }
3870 
bfloat16_to_uint32_round_to_zero(bfloat16 a,float_status * s)3871 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3872 {
3873     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3874 }
3875 
bfloat16_to_uint64_round_to_zero(bfloat16 a,float_status * s)3876 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3877 {
3878     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3879 }
3880 
3881 /*
3882  * Signed integer to floating-point conversions
3883  */
3884 
int64_to_float16_scalbn(int64_t a,int scale,float_status * status)3885 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3886 {
3887     FloatParts64 p;
3888 
3889     parts_sint_to_float(&p, a, scale, status);
3890     return float16_round_pack_canonical(&p, status);
3891 }
3892 
int32_to_float16_scalbn(int32_t a,int scale,float_status * status)3893 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3894 {
3895     return int64_to_float16_scalbn(a, scale, status);
3896 }
3897 
int16_to_float16_scalbn(int16_t a,int scale,float_status * status)3898 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3899 {
3900     return int64_to_float16_scalbn(a, scale, status);
3901 }
3902 
int64_to_float16(int64_t a,float_status * status)3903 float16 int64_to_float16(int64_t a, float_status *status)
3904 {
3905     return int64_to_float16_scalbn(a, 0, status);
3906 }
3907 
int32_to_float16(int32_t a,float_status * status)3908 float16 int32_to_float16(int32_t a, float_status *status)
3909 {
3910     return int64_to_float16_scalbn(a, 0, status);
3911 }
3912 
int16_to_float16(int16_t a,float_status * status)3913 float16 int16_to_float16(int16_t a, float_status *status)
3914 {
3915     return int64_to_float16_scalbn(a, 0, status);
3916 }
3917 
int8_to_float16(int8_t a,float_status * status)3918 float16 int8_to_float16(int8_t a, float_status *status)
3919 {
3920     return int64_to_float16_scalbn(a, 0, status);
3921 }
3922 
int64_to_float32_scalbn(int64_t a,int scale,float_status * status)3923 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3924 {
3925     FloatParts64 p;
3926 
3927     /* Without scaling, there are no overflow concerns. */
3928     if (likely(scale == 0) && can_use_fpu(status)) {
3929         union_float32 ur;
3930         ur.h = a;
3931         return ur.s;
3932     }
3933 
3934     parts64_sint_to_float(&p, a, scale, status);
3935     return float32_round_pack_canonical(&p, status);
3936 }
3937 
int32_to_float32_scalbn(int32_t a,int scale,float_status * status)3938 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3939 {
3940     return int64_to_float32_scalbn(a, scale, status);
3941 }
3942 
int16_to_float32_scalbn(int16_t a,int scale,float_status * status)3943 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3944 {
3945     return int64_to_float32_scalbn(a, scale, status);
3946 }
3947 
int64_to_float32(int64_t a,float_status * status)3948 float32 int64_to_float32(int64_t a, float_status *status)
3949 {
3950     return int64_to_float32_scalbn(a, 0, status);
3951 }
3952 
int32_to_float32(int32_t a,float_status * status)3953 float32 int32_to_float32(int32_t a, float_status *status)
3954 {
3955     return int64_to_float32_scalbn(a, 0, status);
3956 }
3957 
int16_to_float32(int16_t a,float_status * status)3958 float32 int16_to_float32(int16_t a, float_status *status)
3959 {
3960     return int64_to_float32_scalbn(a, 0, status);
3961 }
3962 
int64_to_float64_scalbn(int64_t a,int scale,float_status * status)3963 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3964 {
3965     FloatParts64 p;
3966 
3967     /* Without scaling, there are no overflow concerns. */
3968     if (likely(scale == 0) && can_use_fpu(status)) {
3969         union_float64 ur;
3970         ur.h = a;
3971         return ur.s;
3972     }
3973 
3974     parts_sint_to_float(&p, a, scale, status);
3975     return float64_round_pack_canonical(&p, status);
3976 }
3977 
int32_to_float64_scalbn(int32_t a,int scale,float_status * status)3978 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3979 {
3980     return int64_to_float64_scalbn(a, scale, status);
3981 }
3982 
int16_to_float64_scalbn(int16_t a,int scale,float_status * status)3983 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3984 {
3985     return int64_to_float64_scalbn(a, scale, status);
3986 }
3987 
int64_to_float64(int64_t a,float_status * status)3988 float64 int64_to_float64(int64_t a, float_status *status)
3989 {
3990     return int64_to_float64_scalbn(a, 0, status);
3991 }
3992 
int32_to_float64(int32_t a,float_status * status)3993 float64 int32_to_float64(int32_t a, float_status *status)
3994 {
3995     return int64_to_float64_scalbn(a, 0, status);
3996 }
3997 
int16_to_float64(int16_t a,float_status * status)3998 float64 int16_to_float64(int16_t a, float_status *status)
3999 {
4000     return int64_to_float64_scalbn(a, 0, status);
4001 }
4002 
int64_to_bfloat16_scalbn(int64_t a,int scale,float_status * status)4003 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
4004 {
4005     FloatParts64 p;
4006 
4007     parts_sint_to_float(&p, a, scale, status);
4008     return bfloat16_round_pack_canonical(&p, status);
4009 }
4010 
int32_to_bfloat16_scalbn(int32_t a,int scale,float_status * status)4011 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
4012 {
4013     return int64_to_bfloat16_scalbn(a, scale, status);
4014 }
4015 
int16_to_bfloat16_scalbn(int16_t a,int scale,float_status * status)4016 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
4017 {
4018     return int64_to_bfloat16_scalbn(a, scale, status);
4019 }
4020 
int8_to_bfloat16_scalbn(int8_t a,int scale,float_status * status)4021 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
4022 {
4023     return int64_to_bfloat16_scalbn(a, scale, status);
4024 }
4025 
int64_to_bfloat16(int64_t a,float_status * status)4026 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
4027 {
4028     return int64_to_bfloat16_scalbn(a, 0, status);
4029 }
4030 
int32_to_bfloat16(int32_t a,float_status * status)4031 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
4032 {
4033     return int64_to_bfloat16_scalbn(a, 0, status);
4034 }
4035 
int16_to_bfloat16(int16_t a,float_status * status)4036 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
4037 {
4038     return int64_to_bfloat16_scalbn(a, 0, status);
4039 }
4040 
int8_to_bfloat16(int8_t a,float_status * status)4041 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
4042 {
4043     return int64_to_bfloat16_scalbn(a, 0, status);
4044 }
4045 
int128_to_float128(Int128 a,float_status * status)4046 float128 int128_to_float128(Int128 a, float_status *status)
4047 {
4048     FloatParts128 p = { };
4049     int shift;
4050 
4051     if (int128_nz(a)) {
4052         p.cls = float_class_normal;
4053         if (!int128_nonneg(a)) {
4054             p.sign = true;
4055             a = int128_neg(a);
4056         }
4057 
4058         shift = clz64(int128_gethi(a));
4059         if (shift == 64) {
4060             shift += clz64(int128_getlo(a));
4061         }
4062 
4063         p.exp = 127 - shift;
4064         a = int128_lshift(a, shift);
4065 
4066         p.frac_hi = int128_gethi(a);
4067         p.frac_lo = int128_getlo(a);
4068     } else {
4069         p.cls = float_class_zero;
4070     }
4071 
4072     return float128_round_pack_canonical(&p, status);
4073 }
4074 
int64_to_float128(int64_t a,float_status * status)4075 float128 int64_to_float128(int64_t a, float_status *status)
4076 {
4077     FloatParts128 p;
4078 
4079     parts_sint_to_float(&p, a, 0, status);
4080     return float128_round_pack_canonical(&p, status);
4081 }
4082 
int32_to_float128(int32_t a,float_status * status)4083 float128 int32_to_float128(int32_t a, float_status *status)
4084 {
4085     return int64_to_float128(a, status);
4086 }
4087 
int64_to_floatx80(int64_t a,float_status * status)4088 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4089 {
4090     FloatParts128 p;
4091 
4092     parts_sint_to_float(&p, a, 0, status);
4093     return floatx80_round_pack_canonical(&p, status);
4094 }
4095 
int32_to_floatx80(int32_t a,float_status * status)4096 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4097 {
4098     return int64_to_floatx80(a, status);
4099 }
4100 
4101 /*
4102  * Unsigned Integer to floating-point conversions
4103  */
4104 
uint64_to_float16_scalbn(uint64_t a,int scale,float_status * status)4105 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4106 {
4107     FloatParts64 p;
4108 
4109     parts_uint_to_float(&p, a, scale, status);
4110     return float16_round_pack_canonical(&p, status);
4111 }
4112 
uint32_to_float16_scalbn(uint32_t a,int scale,float_status * status)4113 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4114 {
4115     return uint64_to_float16_scalbn(a, scale, status);
4116 }
4117 
uint16_to_float16_scalbn(uint16_t a,int scale,float_status * status)4118 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4119 {
4120     return uint64_to_float16_scalbn(a, scale, status);
4121 }
4122 
uint64_to_float16(uint64_t a,float_status * status)4123 float16 uint64_to_float16(uint64_t a, float_status *status)
4124 {
4125     return uint64_to_float16_scalbn(a, 0, status);
4126 }
4127 
uint32_to_float16(uint32_t a,float_status * status)4128 float16 uint32_to_float16(uint32_t a, float_status *status)
4129 {
4130     return uint64_to_float16_scalbn(a, 0, status);
4131 }
4132 
uint16_to_float16(uint16_t a,float_status * status)4133 float16 uint16_to_float16(uint16_t a, float_status *status)
4134 {
4135     return uint64_to_float16_scalbn(a, 0, status);
4136 }
4137 
uint8_to_float16(uint8_t a,float_status * status)4138 float16 uint8_to_float16(uint8_t a, float_status *status)
4139 {
4140     return uint64_to_float16_scalbn(a, 0, status);
4141 }
4142 
uint64_to_float32_scalbn(uint64_t a,int scale,float_status * status)4143 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4144 {
4145     FloatParts64 p;
4146 
4147     /* Without scaling, there are no overflow concerns. */
4148     if (likely(scale == 0) && can_use_fpu(status)) {
4149         union_float32 ur;
4150         ur.h = a;
4151         return ur.s;
4152     }
4153 
4154     parts_uint_to_float(&p, a, scale, status);
4155     return float32_round_pack_canonical(&p, status);
4156 }
4157 
uint32_to_float32_scalbn(uint32_t a,int scale,float_status * status)4158 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4159 {
4160     return uint64_to_float32_scalbn(a, scale, status);
4161 }
4162 
uint16_to_float32_scalbn(uint16_t a,int scale,float_status * status)4163 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4164 {
4165     return uint64_to_float32_scalbn(a, scale, status);
4166 }
4167 
uint64_to_float32(uint64_t a,float_status * status)4168 float32 uint64_to_float32(uint64_t a, float_status *status)
4169 {
4170     return uint64_to_float32_scalbn(a, 0, status);
4171 }
4172 
uint32_to_float32(uint32_t a,float_status * status)4173 float32 uint32_to_float32(uint32_t a, float_status *status)
4174 {
4175     return uint64_to_float32_scalbn(a, 0, status);
4176 }
4177 
uint16_to_float32(uint16_t a,float_status * status)4178 float32 uint16_to_float32(uint16_t a, float_status *status)
4179 {
4180     return uint64_to_float32_scalbn(a, 0, status);
4181 }
4182 
uint64_to_float64_scalbn(uint64_t a,int scale,float_status * status)4183 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4184 {
4185     FloatParts64 p;
4186 
4187     /* Without scaling, there are no overflow concerns. */
4188     if (likely(scale == 0) && can_use_fpu(status)) {
4189         union_float64 ur;
4190         ur.h = a;
4191         return ur.s;
4192     }
4193 
4194     parts_uint_to_float(&p, a, scale, status);
4195     return float64_round_pack_canonical(&p, status);
4196 }
4197 
uint32_to_float64_scalbn(uint32_t a,int scale,float_status * status)4198 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4199 {
4200     return uint64_to_float64_scalbn(a, scale, status);
4201 }
4202 
uint16_to_float64_scalbn(uint16_t a,int scale,float_status * status)4203 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4204 {
4205     return uint64_to_float64_scalbn(a, scale, status);
4206 }
4207 
uint64_to_float64(uint64_t a,float_status * status)4208 float64 uint64_to_float64(uint64_t a, float_status *status)
4209 {
4210     return uint64_to_float64_scalbn(a, 0, status);
4211 }
4212 
uint32_to_float64(uint32_t a,float_status * status)4213 float64 uint32_to_float64(uint32_t a, float_status *status)
4214 {
4215     return uint64_to_float64_scalbn(a, 0, status);
4216 }
4217 
uint16_to_float64(uint16_t a,float_status * status)4218 float64 uint16_to_float64(uint16_t a, float_status *status)
4219 {
4220     return uint64_to_float64_scalbn(a, 0, status);
4221 }
4222 
uint64_to_bfloat16_scalbn(uint64_t a,int scale,float_status * status)4223 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4224 {
4225     FloatParts64 p;
4226 
4227     parts_uint_to_float(&p, a, scale, status);
4228     return bfloat16_round_pack_canonical(&p, status);
4229 }
4230 
uint32_to_bfloat16_scalbn(uint32_t a,int scale,float_status * status)4231 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4232 {
4233     return uint64_to_bfloat16_scalbn(a, scale, status);
4234 }
4235 
uint16_to_bfloat16_scalbn(uint16_t a,int scale,float_status * status)4236 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4237 {
4238     return uint64_to_bfloat16_scalbn(a, scale, status);
4239 }
4240 
uint8_to_bfloat16_scalbn(uint8_t a,int scale,float_status * status)4241 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4242 {
4243     return uint64_to_bfloat16_scalbn(a, scale, status);
4244 }
4245 
uint64_to_bfloat16(uint64_t a,float_status * status)4246 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4247 {
4248     return uint64_to_bfloat16_scalbn(a, 0, status);
4249 }
4250 
uint32_to_bfloat16(uint32_t a,float_status * status)4251 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4252 {
4253     return uint64_to_bfloat16_scalbn(a, 0, status);
4254 }
4255 
uint16_to_bfloat16(uint16_t a,float_status * status)4256 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4257 {
4258     return uint64_to_bfloat16_scalbn(a, 0, status);
4259 }
4260 
uint8_to_bfloat16(uint8_t a,float_status * status)4261 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4262 {
4263     return uint64_to_bfloat16_scalbn(a, 0, status);
4264 }
4265 
uint64_to_float128(uint64_t a,float_status * status)4266 float128 uint64_to_float128(uint64_t a, float_status *status)
4267 {
4268     FloatParts128 p;
4269 
4270     parts_uint_to_float(&p, a, 0, status);
4271     return float128_round_pack_canonical(&p, status);
4272 }
4273 
uint128_to_float128(Int128 a,float_status * status)4274 float128 uint128_to_float128(Int128 a, float_status *status)
4275 {
4276     FloatParts128 p = { };
4277     int shift;
4278 
4279     if (int128_nz(a)) {
4280         p.cls = float_class_normal;
4281 
4282         shift = clz64(int128_gethi(a));
4283         if (shift == 64) {
4284             shift += clz64(int128_getlo(a));
4285         }
4286 
4287         p.exp = 127 - shift;
4288         a = int128_lshift(a, shift);
4289 
4290         p.frac_hi = int128_gethi(a);
4291         p.frac_lo = int128_getlo(a);
4292     } else {
4293         p.cls = float_class_zero;
4294     }
4295 
4296     return float128_round_pack_canonical(&p, status);
4297 }
4298 
4299 /*
4300  * Minimum and maximum
4301  */
4302 
float16_minmax(float16 a,float16 b,float_status * s,int flags)4303 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4304 {
4305     FloatParts64 pa, pb, *pr;
4306 
4307     float16_unpack_canonical(&pa, a, s);
4308     float16_unpack_canonical(&pb, b, s);
4309     pr = parts_minmax(&pa, &pb, s, flags);
4310 
4311     return float16_round_pack_canonical(pr, s);
4312 }
4313 
bfloat16_minmax(bfloat16 a,bfloat16 b,float_status * s,int flags)4314 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4315                                 float_status *s, int flags)
4316 {
4317     FloatParts64 pa, pb, *pr;
4318 
4319     bfloat16_unpack_canonical(&pa, a, s);
4320     bfloat16_unpack_canonical(&pb, b, s);
4321     pr = parts_minmax(&pa, &pb, s, flags);
4322 
4323     return bfloat16_round_pack_canonical(pr, s);
4324 }
4325 
float32_minmax(float32 a,float32 b,float_status * s,int flags)4326 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4327 {
4328     FloatParts64 pa, pb, *pr;
4329 
4330     float32_unpack_canonical(&pa, a, s);
4331     float32_unpack_canonical(&pb, b, s);
4332     pr = parts_minmax(&pa, &pb, s, flags);
4333 
4334     return float32_round_pack_canonical(pr, s);
4335 }
4336 
float64_minmax(float64 a,float64 b,float_status * s,int flags)4337 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4338 {
4339     FloatParts64 pa, pb, *pr;
4340 
4341     float64_unpack_canonical(&pa, a, s);
4342     float64_unpack_canonical(&pb, b, s);
4343     pr = parts_minmax(&pa, &pb, s, flags);
4344 
4345     return float64_round_pack_canonical(pr, s);
4346 }
4347 
float128_minmax(float128 a,float128 b,float_status * s,int flags)4348 static float128 float128_minmax(float128 a, float128 b,
4349                                 float_status *s, int flags)
4350 {
4351     FloatParts128 pa, pb, *pr;
4352 
4353     float128_unpack_canonical(&pa, a, s);
4354     float128_unpack_canonical(&pb, b, s);
4355     pr = parts_minmax(&pa, &pb, s, flags);
4356 
4357     return float128_round_pack_canonical(pr, s);
4358 }
4359 
4360 #define MINMAX_1(type, name, flags) \
4361     type type##_##name(type a, type b, float_status *s) \
4362     { return type##_minmax(a, b, s, flags); }
4363 
4364 #define MINMAX_2(type) \
4365     MINMAX_1(type, max, 0)                                                \
4366     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4367     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4368     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4369     MINMAX_1(type, min, minmax_ismin)                                     \
4370     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4371     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4372     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4373 
4374 MINMAX_2(float16)
MINMAX_2(bfloat16)4375 MINMAX_2(bfloat16)
4376 MINMAX_2(float32)
4377 MINMAX_2(float64)
4378 MINMAX_2(float128)
4379 
4380 #undef MINMAX_1
4381 #undef MINMAX_2
4382 
4383 /*
4384  * Floating point compare
4385  */
4386 
4387 static FloatRelation QEMU_FLATTEN
4388 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4389 {
4390     FloatParts64 pa, pb;
4391 
4392     float16_unpack_canonical(&pa, a, s);
4393     float16_unpack_canonical(&pb, b, s);
4394     return parts_compare(&pa, &pb, s, is_quiet);
4395 }
4396 
float16_compare(float16 a,float16 b,float_status * s)4397 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4398 {
4399     return float16_do_compare(a, b, s, false);
4400 }
4401 
float16_compare_quiet(float16 a,float16 b,float_status * s)4402 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4403 {
4404     return float16_do_compare(a, b, s, true);
4405 }
4406 
4407 static FloatRelation QEMU_SOFTFLOAT_ATTR
float32_do_compare(float32 a,float32 b,float_status * s,bool is_quiet)4408 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4409 {
4410     FloatParts64 pa, pb;
4411 
4412     float32_unpack_canonical(&pa, a, s);
4413     float32_unpack_canonical(&pb, b, s);
4414     return parts_compare(&pa, &pb, s, is_quiet);
4415 }
4416 
4417 static FloatRelation QEMU_FLATTEN
float32_hs_compare(float32 xa,float32 xb,float_status * s,bool is_quiet)4418 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4419 {
4420     union_float32 ua, ub;
4421 
4422     ua.s = xa;
4423     ub.s = xb;
4424 
4425     if (QEMU_NO_HARDFLOAT) {
4426         goto soft;
4427     }
4428 
4429     if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) {
4430         /* We may need to set the input_denormal_used flag */
4431         goto soft;
4432     }
4433 
4434     if (isgreaterequal(ua.h, ub.h)) {
4435         if (isgreater(ua.h, ub.h)) {
4436             return float_relation_greater;
4437         }
4438         return float_relation_equal;
4439     }
4440     if (likely(isless(ua.h, ub.h))) {
4441         return float_relation_less;
4442     }
4443     /*
4444      * The only condition remaining is unordered.
4445      * Fall through to set flags.
4446      */
4447  soft:
4448     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4449 }
4450 
float32_compare(float32 a,float32 b,float_status * s)4451 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4452 {
4453     return float32_hs_compare(a, b, s, false);
4454 }
4455 
float32_compare_quiet(float32 a,float32 b,float_status * s)4456 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4457 {
4458     return float32_hs_compare(a, b, s, true);
4459 }
4460 
4461 static FloatRelation QEMU_SOFTFLOAT_ATTR
float64_do_compare(float64 a,float64 b,float_status * s,bool is_quiet)4462 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4463 {
4464     FloatParts64 pa, pb;
4465 
4466     float64_unpack_canonical(&pa, a, s);
4467     float64_unpack_canonical(&pb, b, s);
4468     return parts_compare(&pa, &pb, s, is_quiet);
4469 }
4470 
4471 static FloatRelation QEMU_FLATTEN
float64_hs_compare(float64 xa,float64 xb,float_status * s,bool is_quiet)4472 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4473 {
4474     union_float64 ua, ub;
4475 
4476     ua.s = xa;
4477     ub.s = xb;
4478 
4479     if (QEMU_NO_HARDFLOAT) {
4480         goto soft;
4481     }
4482 
4483     if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) {
4484         /* We may need to set the input_denormal_used flag */
4485         goto soft;
4486     }
4487 
4488     if (isgreaterequal(ua.h, ub.h)) {
4489         if (isgreater(ua.h, ub.h)) {
4490             return float_relation_greater;
4491         }
4492         return float_relation_equal;
4493     }
4494     if (likely(isless(ua.h, ub.h))) {
4495         return float_relation_less;
4496     }
4497     /*
4498      * The only condition remaining is unordered.
4499      * Fall through to set flags.
4500      */
4501  soft:
4502     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4503 }
4504 
float64_compare(float64 a,float64 b,float_status * s)4505 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4506 {
4507     return float64_hs_compare(a, b, s, false);
4508 }
4509 
float64_compare_quiet(float64 a,float64 b,float_status * s)4510 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4511 {
4512     return float64_hs_compare(a, b, s, true);
4513 }
4514 
4515 static FloatRelation QEMU_FLATTEN
bfloat16_do_compare(bfloat16 a,bfloat16 b,float_status * s,bool is_quiet)4516 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4517 {
4518     FloatParts64 pa, pb;
4519 
4520     bfloat16_unpack_canonical(&pa, a, s);
4521     bfloat16_unpack_canonical(&pb, b, s);
4522     return parts_compare(&pa, &pb, s, is_quiet);
4523 }
4524 
bfloat16_compare(bfloat16 a,bfloat16 b,float_status * s)4525 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4526 {
4527     return bfloat16_do_compare(a, b, s, false);
4528 }
4529 
bfloat16_compare_quiet(bfloat16 a,bfloat16 b,float_status * s)4530 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4531 {
4532     return bfloat16_do_compare(a, b, s, true);
4533 }
4534 
4535 static FloatRelation QEMU_FLATTEN
float128_do_compare(float128 a,float128 b,float_status * s,bool is_quiet)4536 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4537 {
4538     FloatParts128 pa, pb;
4539 
4540     float128_unpack_canonical(&pa, a, s);
4541     float128_unpack_canonical(&pb, b, s);
4542     return parts_compare(&pa, &pb, s, is_quiet);
4543 }
4544 
float128_compare(float128 a,float128 b,float_status * s)4545 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4546 {
4547     return float128_do_compare(a, b, s, false);
4548 }
4549 
float128_compare_quiet(float128 a,float128 b,float_status * s)4550 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4551 {
4552     return float128_do_compare(a, b, s, true);
4553 }
4554 
4555 static FloatRelation QEMU_FLATTEN
floatx80_do_compare(floatx80 a,floatx80 b,float_status * s,bool is_quiet)4556 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4557 {
4558     FloatParts128 pa, pb;
4559 
4560     if (!floatx80_unpack_canonical(&pa, a, s) ||
4561         !floatx80_unpack_canonical(&pb, b, s)) {
4562         return float_relation_unordered;
4563     }
4564     return parts_compare(&pa, &pb, s, is_quiet);
4565 }
4566 
floatx80_compare(floatx80 a,floatx80 b,float_status * s)4567 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4568 {
4569     return floatx80_do_compare(a, b, s, false);
4570 }
4571 
floatx80_compare_quiet(floatx80 a,floatx80 b,float_status * s)4572 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4573 {
4574     return floatx80_do_compare(a, b, s, true);
4575 }
4576 
4577 /*
4578  * Scale by 2**N
4579  */
4580 
float16_scalbn(float16 a,int n,float_status * status)4581 float16 float16_scalbn(float16 a, int n, float_status *status)
4582 {
4583     FloatParts64 p;
4584 
4585     float16_unpack_canonical(&p, a, status);
4586     parts_scalbn(&p, n, status);
4587     return float16_round_pack_canonical(&p, status);
4588 }
4589 
float32_scalbn(float32 a,int n,float_status * status)4590 float32 float32_scalbn(float32 a, int n, float_status *status)
4591 {
4592     FloatParts64 p;
4593 
4594     float32_unpack_canonical(&p, a, status);
4595     parts_scalbn(&p, n, status);
4596     return float32_round_pack_canonical(&p, status);
4597 }
4598 
float64_scalbn(float64 a,int n,float_status * status)4599 float64 float64_scalbn(float64 a, int n, float_status *status)
4600 {
4601     FloatParts64 p;
4602 
4603     float64_unpack_canonical(&p, a, status);
4604     parts_scalbn(&p, n, status);
4605     return float64_round_pack_canonical(&p, status);
4606 }
4607 
bfloat16_scalbn(bfloat16 a,int n,float_status * status)4608 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4609 {
4610     FloatParts64 p;
4611 
4612     bfloat16_unpack_canonical(&p, a, status);
4613     parts_scalbn(&p, n, status);
4614     return bfloat16_round_pack_canonical(&p, status);
4615 }
4616 
float128_scalbn(float128 a,int n,float_status * status)4617 float128 float128_scalbn(float128 a, int n, float_status *status)
4618 {
4619     FloatParts128 p;
4620 
4621     float128_unpack_canonical(&p, a, status);
4622     parts_scalbn(&p, n, status);
4623     return float128_round_pack_canonical(&p, status);
4624 }
4625 
floatx80_scalbn(floatx80 a,int n,float_status * status)4626 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4627 {
4628     FloatParts128 p;
4629 
4630     if (!floatx80_unpack_canonical(&p, a, status)) {
4631         return floatx80_default_nan(status);
4632     }
4633     parts_scalbn(&p, n, status);
4634     return floatx80_round_pack_canonical(&p, status);
4635 }
4636 
4637 /*
4638  * Square Root
4639  */
4640 
float16_sqrt(float16 a,float_status * status)4641 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4642 {
4643     FloatParts64 p;
4644 
4645     float16_unpack_canonical(&p, a, status);
4646     parts_sqrt(&p, status, &float16_params);
4647     return float16_round_pack_canonical(&p, status);
4648 }
4649 
4650 static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_sqrt(float32 a,float_status * status)4651 soft_f32_sqrt(float32 a, float_status *status)
4652 {
4653     FloatParts64 p;
4654 
4655     float32_unpack_canonical(&p, a, status);
4656     parts_sqrt(&p, status, &float32_params);
4657     return float32_round_pack_canonical(&p, status);
4658 }
4659 
4660 static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_sqrt(float64 a,float_status * status)4661 soft_f64_sqrt(float64 a, float_status *status)
4662 {
4663     FloatParts64 p;
4664 
4665     float64_unpack_canonical(&p, a, status);
4666     parts_sqrt(&p, status, &float64_params);
4667     return float64_round_pack_canonical(&p, status);
4668 }
4669 
float32_sqrt(float32 xa,float_status * s)4670 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4671 {
4672     union_float32 ua, ur;
4673 
4674     ua.s = xa;
4675     if (unlikely(!can_use_fpu(s))) {
4676         goto soft;
4677     }
4678 
4679     float32_input_flush1(&ua.s, s);
4680     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4681         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4682                        fpclassify(ua.h) == FP_ZERO) ||
4683                      signbit(ua.h))) {
4684             goto soft;
4685         }
4686     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4687                         float32_is_neg(ua.s))) {
4688         goto soft;
4689     }
4690     ur.h = sqrtf(ua.h);
4691     return ur.s;
4692 
4693  soft:
4694     return soft_f32_sqrt(ua.s, s);
4695 }
4696 
float64_sqrt(float64 xa,float_status * s)4697 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4698 {
4699     union_float64 ua, ur;
4700 
4701     ua.s = xa;
4702     if (unlikely(!can_use_fpu(s))) {
4703         goto soft;
4704     }
4705 
4706     float64_input_flush1(&ua.s, s);
4707     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4708         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4709                        fpclassify(ua.h) == FP_ZERO) ||
4710                      signbit(ua.h))) {
4711             goto soft;
4712         }
4713     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4714                         float64_is_neg(ua.s))) {
4715         goto soft;
4716     }
4717     ur.h = sqrt(ua.h);
4718     return ur.s;
4719 
4720  soft:
4721     return soft_f64_sqrt(ua.s, s);
4722 }
4723 
float64r32_sqrt(float64 a,float_status * status)4724 float64 float64r32_sqrt(float64 a, float_status *status)
4725 {
4726     FloatParts64 p;
4727 
4728     float64_unpack_canonical(&p, a, status);
4729     parts_sqrt(&p, status, &float64_params);
4730     return float64r32_round_pack_canonical(&p, status);
4731 }
4732 
bfloat16_sqrt(bfloat16 a,float_status * status)4733 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4734 {
4735     FloatParts64 p;
4736 
4737     bfloat16_unpack_canonical(&p, a, status);
4738     parts_sqrt(&p, status, &bfloat16_params);
4739     return bfloat16_round_pack_canonical(&p, status);
4740 }
4741 
float128_sqrt(float128 a,float_status * status)4742 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4743 {
4744     FloatParts128 p;
4745 
4746     float128_unpack_canonical(&p, a, status);
4747     parts_sqrt(&p, status, &float128_params);
4748     return float128_round_pack_canonical(&p, status);
4749 }
4750 
floatx80_sqrt(floatx80 a,float_status * s)4751 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4752 {
4753     FloatParts128 p;
4754 
4755     if (!floatx80_unpack_canonical(&p, a, s)) {
4756         return floatx80_default_nan(s);
4757     }
4758     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4759     return floatx80_round_pack_canonical(&p, s);
4760 }
4761 
4762 /*
4763  * log2
4764  */
float32_log2(float32 a,float_status * status)4765 float32 float32_log2(float32 a, float_status *status)
4766 {
4767     FloatParts64 p;
4768 
4769     float32_unpack_canonical(&p, a, status);
4770     parts_log2(&p, status, &float32_params);
4771     return float32_round_pack_canonical(&p, status);
4772 }
4773 
float64_log2(float64 a,float_status * status)4774 float64 float64_log2(float64 a, float_status *status)
4775 {
4776     FloatParts64 p;
4777 
4778     float64_unpack_canonical(&p, a, status);
4779     parts_log2(&p, status, &float64_params);
4780     return float64_round_pack_canonical(&p, status);
4781 }
4782 
4783 /*----------------------------------------------------------------------------
4784 | The pattern for a default generated NaN.
4785 *----------------------------------------------------------------------------*/
4786 
float16_default_nan(float_status * status)4787 float16 float16_default_nan(float_status *status)
4788 {
4789     FloatParts64 p;
4790 
4791     parts_default_nan(&p, status);
4792     p.frac >>= float16_params.frac_shift;
4793     return float16_pack_raw(&p);
4794 }
4795 
float32_default_nan(float_status * status)4796 float32 float32_default_nan(float_status *status)
4797 {
4798     FloatParts64 p;
4799 
4800     parts_default_nan(&p, status);
4801     p.frac >>= float32_params.frac_shift;
4802     return float32_pack_raw(&p);
4803 }
4804 
float64_default_nan(float_status * status)4805 float64 float64_default_nan(float_status *status)
4806 {
4807     FloatParts64 p;
4808 
4809     parts_default_nan(&p, status);
4810     p.frac >>= float64_params.frac_shift;
4811     return float64_pack_raw(&p);
4812 }
4813 
float128_default_nan(float_status * status)4814 float128 float128_default_nan(float_status *status)
4815 {
4816     FloatParts128 p;
4817 
4818     parts_default_nan(&p, status);
4819     frac_shr(&p, float128_params.frac_shift);
4820     return float128_pack_raw(&p);
4821 }
4822 
bfloat16_default_nan(float_status * status)4823 bfloat16 bfloat16_default_nan(float_status *status)
4824 {
4825     FloatParts64 p;
4826 
4827     parts_default_nan(&p, status);
4828     p.frac >>= bfloat16_params.frac_shift;
4829     return bfloat16_pack_raw(&p);
4830 }
4831 
4832 /*----------------------------------------------------------------------------
4833 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4834 *----------------------------------------------------------------------------*/
4835 
float16_silence_nan(float16 a,float_status * status)4836 float16 float16_silence_nan(float16 a, float_status *status)
4837 {
4838     FloatParts64 p;
4839 
4840     float16_unpack_raw(&p, a);
4841     p.frac <<= float16_params.frac_shift;
4842     parts_silence_nan(&p, status);
4843     p.frac >>= float16_params.frac_shift;
4844     return float16_pack_raw(&p);
4845 }
4846 
float32_silence_nan(float32 a,float_status * status)4847 float32 float32_silence_nan(float32 a, float_status *status)
4848 {
4849     FloatParts64 p;
4850 
4851     float32_unpack_raw(&p, a);
4852     p.frac <<= float32_params.frac_shift;
4853     parts_silence_nan(&p, status);
4854     p.frac >>= float32_params.frac_shift;
4855     return float32_pack_raw(&p);
4856 }
4857 
float64_silence_nan(float64 a,float_status * status)4858 float64 float64_silence_nan(float64 a, float_status *status)
4859 {
4860     FloatParts64 p;
4861 
4862     float64_unpack_raw(&p, a);
4863     p.frac <<= float64_params.frac_shift;
4864     parts_silence_nan(&p, status);
4865     p.frac >>= float64_params.frac_shift;
4866     return float64_pack_raw(&p);
4867 }
4868 
bfloat16_silence_nan(bfloat16 a,float_status * status)4869 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4870 {
4871     FloatParts64 p;
4872 
4873     bfloat16_unpack_raw(&p, a);
4874     p.frac <<= bfloat16_params.frac_shift;
4875     parts_silence_nan(&p, status);
4876     p.frac >>= bfloat16_params.frac_shift;
4877     return bfloat16_pack_raw(&p);
4878 }
4879 
float128_silence_nan(float128 a,float_status * status)4880 float128 float128_silence_nan(float128 a, float_status *status)
4881 {
4882     FloatParts128 p;
4883 
4884     float128_unpack_raw(&p, a);
4885     frac_shl(&p, float128_params.frac_shift);
4886     parts_silence_nan(&p, status);
4887     frac_shr(&p, float128_params.frac_shift);
4888     return float128_pack_raw(&p);
4889 }
4890 
4891 /*----------------------------------------------------------------------------
4892 | If `a' is denormal and we are in flush-to-zero mode then set the
4893 | input-denormal exception and return zero. Otherwise just return the value.
4894 *----------------------------------------------------------------------------*/
4895 
parts_squash_denormal(FloatParts64 p,float_status * status)4896 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4897 {
4898     if (p.exp == 0 && p.frac != 0) {
4899         float_raise(float_flag_input_denormal_flushed, status);
4900         return true;
4901     }
4902 
4903     return false;
4904 }
4905 
float16_squash_input_denormal(float16 a,float_status * status)4906 float16 float16_squash_input_denormal(float16 a, float_status *status)
4907 {
4908     if (status->flush_inputs_to_zero) {
4909         FloatParts64 p;
4910 
4911         float16_unpack_raw(&p, a);
4912         if (parts_squash_denormal(p, status)) {
4913             return float16_set_sign(float16_zero, p.sign);
4914         }
4915     }
4916     return a;
4917 }
4918 
float32_squash_input_denormal(float32 a,float_status * status)4919 float32 float32_squash_input_denormal(float32 a, float_status *status)
4920 {
4921     if (status->flush_inputs_to_zero) {
4922         FloatParts64 p;
4923 
4924         float32_unpack_raw(&p, a);
4925         if (parts_squash_denormal(p, status)) {
4926             return float32_set_sign(float32_zero, p.sign);
4927         }
4928     }
4929     return a;
4930 }
4931 
float64_squash_input_denormal(float64 a,float_status * status)4932 float64 float64_squash_input_denormal(float64 a, float_status *status)
4933 {
4934     if (status->flush_inputs_to_zero) {
4935         FloatParts64 p;
4936 
4937         float64_unpack_raw(&p, a);
4938         if (parts_squash_denormal(p, status)) {
4939             return float64_set_sign(float64_zero, p.sign);
4940         }
4941     }
4942     return a;
4943 }
4944 
bfloat16_squash_input_denormal(bfloat16 a,float_status * status)4945 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4946 {
4947     if (status->flush_inputs_to_zero) {
4948         FloatParts64 p;
4949 
4950         bfloat16_unpack_raw(&p, a);
4951         if (parts_squash_denormal(p, status)) {
4952             return bfloat16_set_sign(bfloat16_zero, p.sign);
4953         }
4954     }
4955     return a;
4956 }
4957 
4958 /*----------------------------------------------------------------------------
4959 | Normalizes the subnormal extended double-precision floating-point value
4960 | represented by the denormalized significand `aSig'.  The normalized exponent
4961 | and significand are stored at the locations pointed to by `zExpPtr' and
4962 | `zSigPtr', respectively.
4963 *----------------------------------------------------------------------------*/
4964 
normalizeFloatx80Subnormal(uint64_t aSig,int32_t * zExpPtr,uint64_t * zSigPtr)4965 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4966                                 uint64_t *zSigPtr)
4967 {
4968     int8_t shiftCount;
4969 
4970     shiftCount = clz64(aSig);
4971     *zSigPtr = aSig<<shiftCount;
4972     *zExpPtr = 1 - shiftCount;
4973 }
4974 
4975 /*----------------------------------------------------------------------------
4976 | Takes two extended double-precision floating-point values `a' and `b', one
4977 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
4978 | `b' is a signaling NaN, the invalid exception is raised.
4979 *----------------------------------------------------------------------------*/
4980 
propagateFloatx80NaN(floatx80 a,floatx80 b,float_status * status)4981 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
4982 {
4983     FloatParts128 pa, pb, *pr;
4984 
4985     if (!floatx80_unpack_canonical(&pa, a, status) ||
4986         !floatx80_unpack_canonical(&pb, b, status)) {
4987         return floatx80_default_nan(status);
4988     }
4989 
4990     pr = parts_pick_nan(&pa, &pb, status);
4991     return floatx80_round_pack_canonical(pr, status);
4992 }
4993 
4994 /*----------------------------------------------------------------------------
4995 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4996 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4997 | and returns the proper extended double-precision floating-point value
4998 | corresponding to the abstract input.  Ordinarily, the abstract value is
4999 | rounded and packed into the extended double-precision format, with the
5000 | inexact exception raised if the abstract input cannot be represented
5001 | exactly.  However, if the abstract value is too large, the overflow and
5002 | inexact exceptions are raised and an infinity or maximal finite value is
5003 | returned.  If the abstract value is too small, the input value is rounded to
5004 | a subnormal number, and the underflow and inexact exceptions are raised if
5005 | the abstract input cannot be represented exactly as a subnormal extended
5006 | double-precision floating-point number.
5007 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
5008 | the result is rounded to the same number of bits as single or double
5009 | precision, respectively.  Otherwise, the result is rounded to the full
5010 | precision of the extended double-precision format.
5011 |     The input significand must be normalized or smaller.  If the input
5012 | significand is not normalized, `zExp' must be 0; in that case, the result
5013 | returned is a subnormal number, and it must not require rounding.  The
5014 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
5015 | Floating-Point Arithmetic.
5016 *----------------------------------------------------------------------------*/
5017 
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,bool zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)5018 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
5019                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
5020                               float_status *status)
5021 {
5022     FloatRoundMode roundingMode;
5023     bool roundNearestEven, increment, isTiny;
5024     int64_t roundIncrement, roundMask, roundBits;
5025 
5026     roundingMode = status->float_rounding_mode;
5027     roundNearestEven = ( roundingMode == float_round_nearest_even );
5028     switch (roundingPrecision) {
5029     case floatx80_precision_x:
5030         goto precision80;
5031     case floatx80_precision_d:
5032         roundIncrement = UINT64_C(0x0000000000000400);
5033         roundMask = UINT64_C(0x00000000000007FF);
5034         break;
5035     case floatx80_precision_s:
5036         roundIncrement = UINT64_C(0x0000008000000000);
5037         roundMask = UINT64_C(0x000000FFFFFFFFFF);
5038         break;
5039     default:
5040         g_assert_not_reached();
5041     }
5042     zSig0 |= ( zSig1 != 0 );
5043     switch (roundingMode) {
5044     case float_round_nearest_even:
5045     case float_round_ties_away:
5046         break;
5047     case float_round_to_zero:
5048         roundIncrement = 0;
5049         break;
5050     case float_round_up:
5051         roundIncrement = zSign ? 0 : roundMask;
5052         break;
5053     case float_round_down:
5054         roundIncrement = zSign ? roundMask : 0;
5055         break;
5056     default:
5057         abort();
5058     }
5059     roundBits = zSig0 & roundMask;
5060     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5061         if (    ( 0x7FFE < zExp )
5062              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
5063            ) {
5064             goto overflow;
5065         }
5066         if ( zExp <= 0 ) {
5067             if (status->flush_to_zero) {
5068                 float_raise(float_flag_output_denormal_flushed, status);
5069                 return packFloatx80(zSign, 0, 0);
5070             }
5071             isTiny = status->tininess_before_rounding
5072                   || (zExp < 0 )
5073                   || (zSig0 <= zSig0 + roundIncrement);
5074             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5075             zExp = 0;
5076             roundBits = zSig0 & roundMask;
5077             if (isTiny && roundBits) {
5078                 float_raise(float_flag_underflow, status);
5079             }
5080             if (roundBits) {
5081                 float_raise(float_flag_inexact, status);
5082             }
5083             zSig0 += roundIncrement;
5084             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5085             roundIncrement = roundMask + 1;
5086             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5087                 roundMask |= roundIncrement;
5088             }
5089             zSig0 &= ~ roundMask;
5090             return packFloatx80( zSign, zExp, zSig0 );
5091         }
5092     }
5093     if (roundBits) {
5094         float_raise(float_flag_inexact, status);
5095     }
5096     zSig0 += roundIncrement;
5097     if ( zSig0 < roundIncrement ) {
5098         ++zExp;
5099         zSig0 = UINT64_C(0x8000000000000000);
5100     }
5101     roundIncrement = roundMask + 1;
5102     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5103         roundMask |= roundIncrement;
5104     }
5105     zSig0 &= ~ roundMask;
5106     if ( zSig0 == 0 ) zExp = 0;
5107     return packFloatx80( zSign, zExp, zSig0 );
5108  precision80:
5109     switch (roundingMode) {
5110     case float_round_nearest_even:
5111     case float_round_ties_away:
5112         increment = ((int64_t)zSig1 < 0);
5113         break;
5114     case float_round_to_zero:
5115         increment = 0;
5116         break;
5117     case float_round_up:
5118         increment = !zSign && zSig1;
5119         break;
5120     case float_round_down:
5121         increment = zSign && zSig1;
5122         break;
5123     default:
5124         abort();
5125     }
5126     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5127         if (    ( 0x7FFE < zExp )
5128              || (    ( zExp == 0x7FFE )
5129                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5130                   && increment
5131                 )
5132            ) {
5133             roundMask = 0;
5134  overflow:
5135             float_raise(float_flag_overflow | float_flag_inexact, status);
5136             if (    ( roundingMode == float_round_to_zero )
5137                  || ( zSign && ( roundingMode == float_round_up ) )
5138                  || ( ! zSign && ( roundingMode == float_round_down ) )
5139                ) {
5140                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5141             }
5142             return floatx80_default_inf(zSign, status);
5143         }
5144         if ( zExp <= 0 ) {
5145             isTiny = status->tininess_before_rounding
5146                   || (zExp < 0)
5147                   || !increment
5148                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5149             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5150             zExp = 0;
5151             if (isTiny && zSig1) {
5152                 float_raise(float_flag_underflow, status);
5153             }
5154             if (zSig1) {
5155                 float_raise(float_flag_inexact, status);
5156             }
5157             switch (roundingMode) {
5158             case float_round_nearest_even:
5159             case float_round_ties_away:
5160                 increment = ((int64_t)zSig1 < 0);
5161                 break;
5162             case float_round_to_zero:
5163                 increment = 0;
5164                 break;
5165             case float_round_up:
5166                 increment = !zSign && zSig1;
5167                 break;
5168             case float_round_down:
5169                 increment = zSign && zSig1;
5170                 break;
5171             default:
5172                 abort();
5173             }
5174             if ( increment ) {
5175                 ++zSig0;
5176                 if (!(zSig1 << 1) && roundNearestEven) {
5177                     zSig0 &= ~1;
5178                 }
5179                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5180             }
5181             return packFloatx80( zSign, zExp, zSig0 );
5182         }
5183     }
5184     if (zSig1) {
5185         float_raise(float_flag_inexact, status);
5186     }
5187     if ( increment ) {
5188         ++zSig0;
5189         if ( zSig0 == 0 ) {
5190             ++zExp;
5191             zSig0 = UINT64_C(0x8000000000000000);
5192         }
5193         else {
5194             if (!(zSig1 << 1) && roundNearestEven) {
5195                 zSig0 &= ~1;
5196             }
5197         }
5198     }
5199     else {
5200         if ( zSig0 == 0 ) zExp = 0;
5201     }
5202     return packFloatx80( zSign, zExp, zSig0 );
5203 
5204 }
5205 
5206 /*----------------------------------------------------------------------------
5207 | Takes an abstract floating-point value having sign `zSign', exponent
5208 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5209 | and returns the proper extended double-precision floating-point value
5210 | corresponding to the abstract input.  This routine is just like
5211 | `roundAndPackFloatx80' except that the input significand does not have to be
5212 | normalized.
5213 *----------------------------------------------------------------------------*/
5214 
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,bool zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)5215 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5216                                        bool zSign, int32_t zExp,
5217                                        uint64_t zSig0, uint64_t zSig1,
5218                                        float_status *status)
5219 {
5220     int8_t shiftCount;
5221 
5222     if ( zSig0 == 0 ) {
5223         zSig0 = zSig1;
5224         zSig1 = 0;
5225         zExp -= 64;
5226     }
5227     shiftCount = clz64(zSig0);
5228     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5229     zExp -= shiftCount;
5230     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5231                                 zSig0, zSig1, status);
5232 
5233 }
5234 
5235 /*----------------------------------------------------------------------------
5236 | Returns the binary exponential of the single-precision floating-point value
5237 | `a'. The operation is performed according to the IEC/IEEE Standard for
5238 | Binary Floating-Point Arithmetic.
5239 |
5240 | Uses the following identities:
5241 |
5242 | 1. -------------------------------------------------------------------------
5243 |      x    x*ln(2)
5244 |     2  = e
5245 |
5246 | 2. -------------------------------------------------------------------------
5247 |                      2     3     4     5           n
5248 |      x        x     x     x     x     x           x
5249 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5250 |               1!    2!    3!    4!    5!          n!
5251 *----------------------------------------------------------------------------*/
5252 
5253 static const float64 float32_exp2_coefficients[15] =
5254 {
5255     const_float64( 0x3ff0000000000000ll ), /*  1 */
5256     const_float64( 0x3fe0000000000000ll ), /*  2 */
5257     const_float64( 0x3fc5555555555555ll ), /*  3 */
5258     const_float64( 0x3fa5555555555555ll ), /*  4 */
5259     const_float64( 0x3f81111111111111ll ), /*  5 */
5260     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5261     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5262     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5263     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5264     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5265     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5266     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5267     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5268     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5269     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5270 };
5271 
float32_exp2(float32 a,float_status * status)5272 float32 float32_exp2(float32 a, float_status *status)
5273 {
5274     FloatParts64 xp, xnp, tp, rp;
5275     int i;
5276 
5277     float32_unpack_canonical(&xp, a, status);
5278     if (unlikely(xp.cls != float_class_normal)) {
5279         switch (xp.cls) {
5280         case float_class_denormal:
5281             break;
5282         case float_class_snan:
5283         case float_class_qnan:
5284             parts_return_nan(&xp, status);
5285             return float32_round_pack_canonical(&xp, status);
5286         case float_class_inf:
5287             return xp.sign ? float32_zero : a;
5288         case float_class_zero:
5289             return float32_one;
5290         default:
5291             g_assert_not_reached();
5292         }
5293     }
5294 
5295     float_raise(float_flag_inexact, status);
5296 
5297     float64_unpack_canonical(&tp, float64_ln2, status);
5298     xp = *parts_mul(&xp, &tp, status);
5299     xnp = xp;
5300 
5301     float64_unpack_canonical(&rp, float64_one, status);
5302     for (i = 0 ; i < 15 ; i++) {
5303 
5304         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5305         rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
5306         xnp = *parts_mul(&xnp, &xp, status);
5307     }
5308 
5309     return float32_round_pack_canonical(&rp, status);
5310 }
5311 
5312 /*----------------------------------------------------------------------------
5313 | Rounds the extended double-precision floating-point value `a'
5314 | to the precision provided by floatx80_rounding_precision and returns the
5315 | result as an extended double-precision floating-point value.
5316 | The operation is performed according to the IEC/IEEE Standard for Binary
5317 | Floating-Point Arithmetic.
5318 *----------------------------------------------------------------------------*/
5319 
floatx80_round(floatx80 a,float_status * status)5320 floatx80 floatx80_round(floatx80 a, float_status *status)
5321 {
5322     FloatParts128 p;
5323 
5324     if (!floatx80_unpack_canonical(&p, a, status)) {
5325         return floatx80_default_nan(status);
5326     }
5327     return floatx80_round_pack_canonical(&p, status);
5328 }
5329 
softfloat_init(void)5330 static void __attribute__((constructor)) softfloat_init(void)
5331 {
5332     union_float64 ua, ub, uc, ur;
5333 
5334     if (QEMU_NO_HARDFLOAT) {
5335         return;
5336     }
5337     /*
5338      * Test that the host's FMA is not obviously broken. For example,
5339      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5340      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5341      */
5342     ua.s = 0x0020000000000001ULL;
5343     ub.s = 0x3ca0000000000000ULL;
5344     uc.s = 0x0020000000000000ULL;
5345     ur.h = fma(ua.h, ub.h, uc.h);
5346     if (ur.s != 0x0020000000000001ULL) {
5347         force_soft_fma = true;
5348     }
5349 }
5350