xref: /qemu/fpu/softfloat.c (revision 9b6e986e280f43e3df3baf7aae2069d599b5056c)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal_flushed, s);          \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  *
408  * Note that we canonicalize denormals, so most code should treat
409  * class_normal and class_denormal identically.
410  */
411 
412 typedef enum __attribute__ ((__packed__)) {
413     float_class_unclassified,
414     float_class_zero,
415     float_class_normal,
416     float_class_denormal, /* input was a non-squashed denormal */
417     float_class_inf,
418     float_class_qnan,  /* all NaNs from here */
419     float_class_snan,
420 } FloatClass;
421 
422 #define float_cmask(bit)  (1u << (bit))
423 
424 enum {
425     float_cmask_zero    = float_cmask(float_class_zero),
426     float_cmask_normal  = float_cmask(float_class_normal),
427     float_cmask_denormal = float_cmask(float_class_denormal),
428     float_cmask_inf     = float_cmask(float_class_inf),
429     float_cmask_qnan    = float_cmask(float_class_qnan),
430     float_cmask_snan    = float_cmask(float_class_snan),
431 
432     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
433     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
434     float_cmask_anynorm = float_cmask_normal | float_cmask_denormal,
435 };
436 
437 /* Flags for parts_minmax. */
438 enum {
439     /* Set for minimum; clear for maximum. */
440     minmax_ismin = 1,
441     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
442     minmax_isnum = 2,
443     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
444     minmax_ismag = 4,
445     /*
446      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
447      * operations.
448      */
449     minmax_isnumber = 8,
450 };
451 
452 /* Simple helpers for checking if, or what kind of, NaN we have */
453 static inline __attribute__((unused)) bool is_nan(FloatClass c)
454 {
455     return unlikely(c >= float_class_qnan);
456 }
457 
458 static inline __attribute__((unused)) bool is_snan(FloatClass c)
459 {
460     return c == float_class_snan;
461 }
462 
463 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
464 {
465     return c == float_class_qnan;
466 }
467 
468 /*
469  * Return true if the float_cmask has only normals in it
470  * (including input denormals that were canonicalized)
471  */
472 static inline bool cmask_is_only_normals(int cmask)
473 {
474     return !(cmask & ~float_cmask_anynorm);
475 }
476 
477 static inline bool is_anynorm(FloatClass c)
478 {
479     return float_cmask(c) & float_cmask_anynorm;
480 }
481 
482 /*
483  * Structure holding all of the decomposed parts of a float.
484  * The exponent is unbiased and the fraction is normalized.
485  *
486  * The fraction words are stored in big-endian word ordering,
487  * so that truncation from a larger format to a smaller format
488  * can be done simply by ignoring subsequent elements.
489  */
490 
491 typedef struct {
492     FloatClass cls;
493     bool sign;
494     int32_t exp;
495     union {
496         /* Routines that know the structure may reference the singular name. */
497         uint64_t frac;
498         /*
499          * Routines expanded with multiple structures reference "hi" and "lo"
500          * depending on the operation.  In FloatParts64, "hi" and "lo" are
501          * both the same word and aliased here.
502          */
503         uint64_t frac_hi;
504         uint64_t frac_lo;
505     };
506 } FloatParts64;
507 
508 typedef struct {
509     FloatClass cls;
510     bool sign;
511     int32_t exp;
512     uint64_t frac_hi;
513     uint64_t frac_lo;
514 } FloatParts128;
515 
516 typedef struct {
517     FloatClass cls;
518     bool sign;
519     int32_t exp;
520     uint64_t frac_hi;
521     uint64_t frac_hm;  /* high-middle */
522     uint64_t frac_lm;  /* low-middle */
523     uint64_t frac_lo;
524 } FloatParts256;
525 
526 /* These apply to the most significant word of each FloatPartsN. */
527 #define DECOMPOSED_BINARY_POINT    63
528 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
529 
530 /* Structure holding all of the relevant parameters for a format.
531  *   exp_size: the size of the exponent field
532  *   exp_bias: the offset applied to the exponent field
533  *   exp_max: the maximum normalised exponent
534  *   frac_size: the size of the fraction field
535  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
536  * The following are computed based the size of fraction
537  *   round_mask: bits below lsb which must be rounded
538  * The following optional modifiers are available:
539  *   arm_althp: handle ARM Alternative Half Precision
540  *   m68k_denormal: explicit integer bit for extended precision may be 1
541  */
542 typedef struct {
543     int exp_size;
544     int exp_bias;
545     int exp_re_bias;
546     int exp_max;
547     int frac_size;
548     int frac_shift;
549     bool arm_althp;
550     bool m68k_denormal;
551     uint64_t round_mask;
552 } FloatFmt;
553 
554 /* Expand fields based on the size of exponent and fraction */
555 #define FLOAT_PARAMS_(E)                                \
556     .exp_size       = E,                                \
557     .exp_bias       = ((1 << E) - 1) >> 1,              \
558     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
559     .exp_max        = (1 << E) - 1
560 
561 #define FLOAT_PARAMS(E, F)                              \
562     FLOAT_PARAMS_(E),                                   \
563     .frac_size      = F,                                \
564     .frac_shift     = (-F - 1) & 63,                    \
565     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
566 
567 static const FloatFmt float16_params = {
568     FLOAT_PARAMS(5, 10)
569 };
570 
571 static const FloatFmt float16_params_ahp = {
572     FLOAT_PARAMS(5, 10),
573     .arm_althp = true
574 };
575 
576 static const FloatFmt bfloat16_params = {
577     FLOAT_PARAMS(8, 7)
578 };
579 
580 static const FloatFmt float32_params = {
581     FLOAT_PARAMS(8, 23)
582 };
583 
584 static const FloatFmt float64_params = {
585     FLOAT_PARAMS(11, 52)
586 };
587 
588 static const FloatFmt float128_params = {
589     FLOAT_PARAMS(15, 112)
590 };
591 
592 #define FLOATX80_PARAMS(R)              \
593     FLOAT_PARAMS_(15),                  \
594     .frac_size = R == 64 ? 63 : R,      \
595     .frac_shift = 0,                    \
596     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
597 
598 static const FloatFmt floatx80_params[3] = {
599     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
600     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
601     [floatx80_precision_x] = {
602         FLOATX80_PARAMS(64),
603 #ifdef TARGET_M68K
604         .m68k_denormal = true,
605 #endif
606     },
607 };
608 
609 /* Unpack a float to parts, but do not canonicalize.  */
610 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
611 {
612     const int f_size = fmt->frac_size;
613     const int e_size = fmt->exp_size;
614 
615     *r = (FloatParts64) {
616         .cls = float_class_unclassified,
617         .sign = extract64(raw, f_size + e_size, 1),
618         .exp = extract64(raw, f_size, e_size),
619         .frac = extract64(raw, 0, f_size)
620     };
621 }
622 
623 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
624 {
625     unpack_raw64(p, &float16_params, f);
626 }
627 
628 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
629 {
630     unpack_raw64(p, &bfloat16_params, f);
631 }
632 
633 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
634 {
635     unpack_raw64(p, &float32_params, f);
636 }
637 
638 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
639 {
640     unpack_raw64(p, &float64_params, f);
641 }
642 
643 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
644 {
645     *p = (FloatParts128) {
646         .cls = float_class_unclassified,
647         .sign = extract32(f.high, 15, 1),
648         .exp = extract32(f.high, 0, 15),
649         .frac_hi = f.low
650     };
651 }
652 
653 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
654 {
655     const int f_size = float128_params.frac_size - 64;
656     const int e_size = float128_params.exp_size;
657 
658     *p = (FloatParts128) {
659         .cls = float_class_unclassified,
660         .sign = extract64(f.high, f_size + e_size, 1),
661         .exp = extract64(f.high, f_size, e_size),
662         .frac_hi = extract64(f.high, 0, f_size),
663         .frac_lo = f.low,
664     };
665 }
666 
667 /* Pack a float from parts, but do not canonicalize.  */
668 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
669 {
670     const int f_size = fmt->frac_size;
671     const int e_size = fmt->exp_size;
672     uint64_t ret;
673 
674     ret = (uint64_t)p->sign << (f_size + e_size);
675     ret = deposit64(ret, f_size, e_size, p->exp);
676     ret = deposit64(ret, 0, f_size, p->frac);
677     return ret;
678 }
679 
680 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
681 {
682     return make_float16(pack_raw64(p, &float16_params));
683 }
684 
685 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
686 {
687     return pack_raw64(p, &bfloat16_params);
688 }
689 
690 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
691 {
692     return make_float32(pack_raw64(p, &float32_params));
693 }
694 
695 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
696 {
697     return make_float64(pack_raw64(p, &float64_params));
698 }
699 
700 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
701 {
702     const int f_size = float128_params.frac_size - 64;
703     const int e_size = float128_params.exp_size;
704     uint64_t hi;
705 
706     hi = (uint64_t)p->sign << (f_size + e_size);
707     hi = deposit64(hi, f_size, e_size, p->exp);
708     hi = deposit64(hi, 0, f_size, p->frac_hi);
709     return make_float128(hi, p->frac_lo);
710 }
711 
712 /*----------------------------------------------------------------------------
713 | Functions and definitions to determine:  (1) whether tininess for underflow
714 | is detected before or after rounding by default, (2) what (if anything)
715 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
716 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
717 | are propagated from function inputs to output.  These details are target-
718 | specific.
719 *----------------------------------------------------------------------------*/
720 #include "softfloat-specialize.c.inc"
721 
722 #define PARTS_GENERIC_64_128(NAME, P) \
723     _Generic((P), FloatParts64 *: parts64_##NAME, \
724                   FloatParts128 *: parts128_##NAME)
725 
726 #define PARTS_GENERIC_64_128_256(NAME, P) \
727     _Generic((P), FloatParts64 *: parts64_##NAME, \
728                   FloatParts128 *: parts128_##NAME, \
729                   FloatParts256 *: parts256_##NAME)
730 
731 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
732 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
733 
734 static void parts64_return_nan(FloatParts64 *a, float_status *s);
735 static void parts128_return_nan(FloatParts128 *a, float_status *s);
736 
737 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
738 
739 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
740                                       float_status *s);
741 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
742                                         float_status *s);
743 
744 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
745 
746 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
747                                              FloatParts64 *c, float_status *s,
748                                              int ab_mask, int abc_mask);
749 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
750                                                FloatParts128 *b,
751                                                FloatParts128 *c,
752                                                float_status *s,
753                                                int ab_mask, int abc_mask);
754 
755 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
756     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
757 
758 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
759                                  const FloatFmt *fmt);
760 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
761                                   const FloatFmt *fmt);
762 
763 #define parts_canonicalize(A, S, F) \
764     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
765 
766 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
767                                    const FloatFmt *fmt);
768 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
769                                     const FloatFmt *fmt);
770 
771 #define parts_uncanon_normal(A, S, F) \
772     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
773 
774 static void parts64_uncanon(FloatParts64 *p, float_status *status,
775                             const FloatFmt *fmt);
776 static void parts128_uncanon(FloatParts128 *p, float_status *status,
777                              const FloatFmt *fmt);
778 
779 #define parts_uncanon(A, S, F) \
780     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
781 
782 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
783 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
784 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
785 
786 #define parts_add_normal(A, B) \
787     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
788 
789 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
790 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
791 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
792 
793 #define parts_sub_normal(A, B) \
794     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
795 
796 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
797                                     float_status *s, bool subtract);
798 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
799                                       float_status *s, bool subtract);
800 
801 #define parts_addsub(A, B, S, Z) \
802     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
803 
804 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
805                                  float_status *s);
806 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
807                                    float_status *s);
808 
809 #define parts_mul(A, B, S) \
810     PARTS_GENERIC_64_128(mul, A)(A, B, S)
811 
812 static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
813                                            FloatParts64 *c, int scale,
814                                            int flags, float_status *s);
815 static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
816                                              FloatParts128 *c, int scale,
817                                              int flags, float_status *s);
818 
819 #define parts_muladd_scalbn(A, B, C, Z, Y, S) \
820     PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
821 
822 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
823                                  float_status *s);
824 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
825                                    float_status *s);
826 
827 #define parts_div(A, B, S) \
828     PARTS_GENERIC_64_128(div, A)(A, B, S)
829 
830 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
831                                     uint64_t *mod_quot, float_status *s);
832 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
833                                       uint64_t *mod_quot, float_status *s);
834 
835 #define parts_modrem(A, B, Q, S) \
836     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
837 
838 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
839 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
840 
841 #define parts_sqrt(A, S, F) \
842     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
843 
844 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
845                                         int scale, int frac_size);
846 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
847                                          int scale, int frac_size);
848 
849 #define parts_round_to_int_normal(A, R, C, F) \
850     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
851 
852 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
853                                  int scale, float_status *s,
854                                  const FloatFmt *fmt);
855 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
856                                   int scale, float_status *s,
857                                   const FloatFmt *fmt);
858 
859 #define parts_round_to_int(A, R, C, S, F) \
860     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
861 
862 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
863                                      int scale, int64_t min, int64_t max,
864                                      float_status *s);
865 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
866                                      int scale, int64_t min, int64_t max,
867                                      float_status *s);
868 
869 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
870     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
871 
872 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
873                                       int scale, uint64_t max,
874                                       float_status *s);
875 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
876                                        int scale, uint64_t max,
877                                        float_status *s);
878 
879 #define parts_float_to_uint(P, R, Z, M, S) \
880     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
881 
882 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
883                                             FloatRoundMode rmode,
884                                             int bitsm1, float_status *s);
885 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
886                                              FloatRoundMode rmode,
887                                              int bitsm1, float_status *s);
888 
889 #define parts_float_to_sint_modulo(P, R, M, S) \
890     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
891 
892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
893                                   int scale, float_status *s);
894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
895                                    int scale, float_status *s);
896 
897 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
898     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
899 
900 #define parts_sint_to_float(P, I, Z, S) \
901     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
902 
903 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
904                                   int scale, float_status *s);
905 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
906                                    int scale, float_status *s);
907 
908 #define parts_uint_to_float(P, I, Z, S) \
909     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
910 
911 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
912                                     float_status *s, int flags);
913 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
914                                       float_status *s, int flags);
915 
916 #define parts_minmax(A, B, S, F) \
917     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
918 
919 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
920                                      float_status *s, bool q);
921 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
922                                       float_status *s, bool q);
923 
924 #define parts_compare(A, B, S, Q) \
925     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
926 
927 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
928 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
929 
930 #define parts_scalbn(A, N, S) \
931     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
932 
933 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
934 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
935 
936 #define parts_log2(A, S, F) \
937     PARTS_GENERIC_64_128(log2, A)(A, S, F)
938 
939 /*
940  * Helper functions for softfloat-parts.c.inc, per-size operations.
941  */
942 
943 #define FRAC_GENERIC_64_128(NAME, P) \
944     _Generic((P), FloatParts64 *: frac64_##NAME, \
945                   FloatParts128 *: frac128_##NAME)
946 
947 #define FRAC_GENERIC_64_128_256(NAME, P) \
948     _Generic((P), FloatParts64 *: frac64_##NAME, \
949                   FloatParts128 *: frac128_##NAME, \
950                   FloatParts256 *: frac256_##NAME)
951 
952 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
953 {
954     return uadd64_overflow(a->frac, b->frac, &r->frac);
955 }
956 
957 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
958 {
959     bool c = 0;
960     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
961     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
962     return c;
963 }
964 
965 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
966 {
967     bool c = 0;
968     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
969     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
970     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
971     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
972     return c;
973 }
974 
975 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
976 
977 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
978 {
979     return uadd64_overflow(a->frac, c, &r->frac);
980 }
981 
982 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
983 {
984     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
985     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
986 }
987 
988 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
989 
990 static void frac64_allones(FloatParts64 *a)
991 {
992     a->frac = -1;
993 }
994 
995 static void frac128_allones(FloatParts128 *a)
996 {
997     a->frac_hi = a->frac_lo = -1;
998 }
999 
1000 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
1001 
1002 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
1003 {
1004     return (a->frac == b->frac ? float_relation_equal
1005             : a->frac < b->frac ? float_relation_less
1006             : float_relation_greater);
1007 }
1008 
1009 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
1010 {
1011     uint64_t ta = a->frac_hi, tb = b->frac_hi;
1012     if (ta == tb) {
1013         ta = a->frac_lo, tb = b->frac_lo;
1014         if (ta == tb) {
1015             return float_relation_equal;
1016         }
1017     }
1018     return ta < tb ? float_relation_less : float_relation_greater;
1019 }
1020 
1021 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1022 
1023 static void frac64_clear(FloatParts64 *a)
1024 {
1025     a->frac = 0;
1026 }
1027 
1028 static void frac128_clear(FloatParts128 *a)
1029 {
1030     a->frac_hi = a->frac_lo = 0;
1031 }
1032 
1033 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1034 
1035 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1036 {
1037     uint64_t n1, n0, r, q;
1038     bool ret;
1039 
1040     /*
1041      * We want a 2*N / N-bit division to produce exactly an N-bit
1042      * result, so that we do not lose any precision and so that we
1043      * do not have to renormalize afterward.  If A.frac < B.frac,
1044      * then division would produce an (N-1)-bit result; shift A left
1045      * by one to produce the an N-bit result, and return true to
1046      * decrement the exponent to match.
1047      *
1048      * The udiv_qrnnd algorithm that we're using requires normalization,
1049      * i.e. the msb of the denominator must be set, which is already true.
1050      */
1051     ret = a->frac < b->frac;
1052     if (ret) {
1053         n0 = a->frac;
1054         n1 = 0;
1055     } else {
1056         n0 = a->frac >> 1;
1057         n1 = a->frac << 63;
1058     }
1059     q = udiv_qrnnd(&r, n0, n1, b->frac);
1060 
1061     /* Set lsb if there is a remainder, to set inexact. */
1062     a->frac = q | (r != 0);
1063 
1064     return ret;
1065 }
1066 
1067 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1068 {
1069     uint64_t q0, q1, a0, a1, b0, b1;
1070     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1071     bool ret = false;
1072 
1073     a0 = a->frac_hi, a1 = a->frac_lo;
1074     b0 = b->frac_hi, b1 = b->frac_lo;
1075 
1076     ret = lt128(a0, a1, b0, b1);
1077     if (!ret) {
1078         a1 = shr_double(a0, a1, 1);
1079         a0 = a0 >> 1;
1080     }
1081 
1082     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1083     q0 = estimateDiv128To64(a0, a1, b0);
1084 
1085     /*
1086      * Estimate is high because B1 was not included (unless B1 == 0).
1087      * Reduce quotient and increase remainder until remainder is non-negative.
1088      * This loop will execute 0 to 2 times.
1089      */
1090     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1091     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1092     while (r0 != 0) {
1093         q0--;
1094         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1095     }
1096 
1097     /* Repeat using the remainder, producing a second word of quotient. */
1098     q1 = estimateDiv128To64(r1, r2, b0);
1099     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1100     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1101     while (r1 != 0) {
1102         q1--;
1103         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1104     }
1105 
1106     /* Any remainder indicates inexact; set sticky bit. */
1107     q1 |= (r2 | r3) != 0;
1108 
1109     a->frac_hi = q0;
1110     a->frac_lo = q1;
1111     return ret;
1112 }
1113 
1114 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1115 
1116 static bool frac64_eqz(FloatParts64 *a)
1117 {
1118     return a->frac == 0;
1119 }
1120 
1121 static bool frac128_eqz(FloatParts128 *a)
1122 {
1123     return (a->frac_hi | a->frac_lo) == 0;
1124 }
1125 
1126 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1127 
1128 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1129 {
1130     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1131 }
1132 
1133 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1134 {
1135     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1136                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1137 }
1138 
1139 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1140 
1141 static void frac64_neg(FloatParts64 *a)
1142 {
1143     a->frac = -a->frac;
1144 }
1145 
1146 static void frac128_neg(FloatParts128 *a)
1147 {
1148     bool c = 0;
1149     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1150     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1151 }
1152 
1153 static void frac256_neg(FloatParts256 *a)
1154 {
1155     bool c = 0;
1156     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1157     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1158     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1159     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1160 }
1161 
1162 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1163 
1164 static int frac64_normalize(FloatParts64 *a)
1165 {
1166     if (a->frac) {
1167         int shift = clz64(a->frac);
1168         a->frac <<= shift;
1169         return shift;
1170     }
1171     return 64;
1172 }
1173 
1174 static int frac128_normalize(FloatParts128 *a)
1175 {
1176     if (a->frac_hi) {
1177         int shl = clz64(a->frac_hi);
1178         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1179         a->frac_lo <<= shl;
1180         return shl;
1181     } else if (a->frac_lo) {
1182         int shl = clz64(a->frac_lo);
1183         a->frac_hi = a->frac_lo << shl;
1184         a->frac_lo = 0;
1185         return shl + 64;
1186     }
1187     return 128;
1188 }
1189 
1190 static int frac256_normalize(FloatParts256 *a)
1191 {
1192     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1193     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1194     int ret, shl;
1195 
1196     if (likely(a0)) {
1197         shl = clz64(a0);
1198         if (shl == 0) {
1199             return 0;
1200         }
1201         ret = shl;
1202     } else {
1203         if (a1) {
1204             ret = 64;
1205             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1206         } else if (a2) {
1207             ret = 128;
1208             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1209         } else if (a3) {
1210             ret = 192;
1211             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1212         } else {
1213             ret = 256;
1214             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1215             goto done;
1216         }
1217         shl = clz64(a0);
1218         if (shl == 0) {
1219             goto done;
1220         }
1221         ret += shl;
1222     }
1223 
1224     a0 = shl_double(a0, a1, shl);
1225     a1 = shl_double(a1, a2, shl);
1226     a2 = shl_double(a2, a3, shl);
1227     a3 <<= shl;
1228 
1229  done:
1230     a->frac_hi = a0;
1231     a->frac_hm = a1;
1232     a->frac_lm = a2;
1233     a->frac_lo = a3;
1234     return ret;
1235 }
1236 
1237 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1238 
1239 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1240 {
1241     uint64_t a0, a1, b0, t0, t1, q, quot;
1242     int exp_diff = a->exp - b->exp;
1243     int shift;
1244 
1245     a0 = a->frac;
1246     a1 = 0;
1247 
1248     if (exp_diff < -1) {
1249         if (mod_quot) {
1250             *mod_quot = 0;
1251         }
1252         return;
1253     }
1254     if (exp_diff == -1) {
1255         a0 >>= 1;
1256         exp_diff = 0;
1257     }
1258 
1259     b0 = b->frac;
1260     quot = q = b0 <= a0;
1261     if (q) {
1262         a0 -= b0;
1263     }
1264 
1265     exp_diff -= 64;
1266     while (exp_diff > 0) {
1267         q = estimateDiv128To64(a0, a1, b0);
1268         q = q > 2 ? q - 2 : 0;
1269         mul64To128(b0, q, &t0, &t1);
1270         sub128(a0, a1, t0, t1, &a0, &a1);
1271         shortShift128Left(a0, a1, 62, &a0, &a1);
1272         exp_diff -= 62;
1273         quot = (quot << 62) + q;
1274     }
1275 
1276     exp_diff += 64;
1277     if (exp_diff > 0) {
1278         q = estimateDiv128To64(a0, a1, b0);
1279         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1280         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1281         sub128(a0, a1, t0, t1, &a0, &a1);
1282         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1283         while (le128(t0, t1, a0, a1)) {
1284             ++q;
1285             sub128(a0, a1, t0, t1, &a0, &a1);
1286         }
1287         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1288     } else {
1289         t0 = b0;
1290         t1 = 0;
1291     }
1292 
1293     if (mod_quot) {
1294         *mod_quot = quot;
1295     } else {
1296         sub128(t0, t1, a0, a1, &t0, &t1);
1297         if (lt128(t0, t1, a0, a1) ||
1298             (eq128(t0, t1, a0, a1) && (q & 1))) {
1299             a0 = t0;
1300             a1 = t1;
1301             a->sign = !a->sign;
1302         }
1303     }
1304 
1305     if (likely(a0)) {
1306         shift = clz64(a0);
1307         shortShift128Left(a0, a1, shift, &a0, &a1);
1308     } else if (likely(a1)) {
1309         shift = clz64(a1);
1310         a0 = a1 << shift;
1311         a1 = 0;
1312         shift += 64;
1313     } else {
1314         a->cls = float_class_zero;
1315         return;
1316     }
1317 
1318     a->exp = b->exp + exp_diff - shift;
1319     a->frac = a0 | (a1 != 0);
1320 }
1321 
1322 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1323                            uint64_t *mod_quot)
1324 {
1325     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1326     int exp_diff = a->exp - b->exp;
1327     int shift;
1328 
1329     a0 = a->frac_hi;
1330     a1 = a->frac_lo;
1331     a2 = 0;
1332 
1333     if (exp_diff < -1) {
1334         if (mod_quot) {
1335             *mod_quot = 0;
1336         }
1337         return;
1338     }
1339     if (exp_diff == -1) {
1340         shift128Right(a0, a1, 1, &a0, &a1);
1341         exp_diff = 0;
1342     }
1343 
1344     b0 = b->frac_hi;
1345     b1 = b->frac_lo;
1346 
1347     quot = q = le128(b0, b1, a0, a1);
1348     if (q) {
1349         sub128(a0, a1, b0, b1, &a0, &a1);
1350     }
1351 
1352     exp_diff -= 64;
1353     while (exp_diff > 0) {
1354         q = estimateDiv128To64(a0, a1, b0);
1355         q = q > 4 ? q - 4 : 0;
1356         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1357         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1358         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1359         exp_diff -= 61;
1360         quot = (quot << 61) + q;
1361     }
1362 
1363     exp_diff += 64;
1364     if (exp_diff > 0) {
1365         q = estimateDiv128To64(a0, a1, b0);
1366         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1367         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1368         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1369         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1370         while (le192(t0, t1, t2, a0, a1, a2)) {
1371             ++q;
1372             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1373         }
1374         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1375     } else {
1376         t0 = b0;
1377         t1 = b1;
1378         t2 = 0;
1379     }
1380 
1381     if (mod_quot) {
1382         *mod_quot = quot;
1383     } else {
1384         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1385         if (lt192(t0, t1, t2, a0, a1, a2) ||
1386             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1387             a0 = t0;
1388             a1 = t1;
1389             a2 = t2;
1390             a->sign = !a->sign;
1391         }
1392     }
1393 
1394     if (likely(a0)) {
1395         shift = clz64(a0);
1396         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1397     } else if (likely(a1)) {
1398         shift = clz64(a1);
1399         shortShift128Left(a1, a2, shift, &a0, &a1);
1400         a2 = 0;
1401         shift += 64;
1402     } else if (likely(a2)) {
1403         shift = clz64(a2);
1404         a0 = a2 << shift;
1405         a1 = a2 = 0;
1406         shift += 128;
1407     } else {
1408         a->cls = float_class_zero;
1409         return;
1410     }
1411 
1412     a->exp = b->exp + exp_diff - shift;
1413     a->frac_hi = a0;
1414     a->frac_lo = a1 | (a2 != 0);
1415 }
1416 
1417 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1418 
1419 static void frac64_shl(FloatParts64 *a, int c)
1420 {
1421     a->frac <<= c;
1422 }
1423 
1424 static void frac128_shl(FloatParts128 *a, int c)
1425 {
1426     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1427 
1428     if (c & 64) {
1429         a0 = a1, a1 = 0;
1430     }
1431 
1432     c &= 63;
1433     if (c) {
1434         a0 = shl_double(a0, a1, c);
1435         a1 = a1 << c;
1436     }
1437 
1438     a->frac_hi = a0;
1439     a->frac_lo = a1;
1440 }
1441 
1442 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1443 
1444 static void frac64_shr(FloatParts64 *a, int c)
1445 {
1446     a->frac >>= c;
1447 }
1448 
1449 static void frac128_shr(FloatParts128 *a, int c)
1450 {
1451     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1452 
1453     if (c & 64) {
1454         a1 = a0, a0 = 0;
1455     }
1456 
1457     c &= 63;
1458     if (c) {
1459         a1 = shr_double(a0, a1, c);
1460         a0 = a0 >> c;
1461     }
1462 
1463     a->frac_hi = a0;
1464     a->frac_lo = a1;
1465 }
1466 
1467 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1468 
1469 static void frac64_shrjam(FloatParts64 *a, int c)
1470 {
1471     uint64_t a0 = a->frac;
1472 
1473     if (likely(c != 0)) {
1474         if (likely(c < 64)) {
1475             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1476         } else {
1477             a0 = a0 != 0;
1478         }
1479         a->frac = a0;
1480     }
1481 }
1482 
1483 static void frac128_shrjam(FloatParts128 *a, int c)
1484 {
1485     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1486     uint64_t sticky = 0;
1487 
1488     if (unlikely(c == 0)) {
1489         return;
1490     } else if (likely(c < 64)) {
1491         /* nothing */
1492     } else if (likely(c < 128)) {
1493         sticky = a1;
1494         a1 = a0;
1495         a0 = 0;
1496         c &= 63;
1497         if (c == 0) {
1498             goto done;
1499         }
1500     } else {
1501         sticky = a0 | a1;
1502         a0 = a1 = 0;
1503         goto done;
1504     }
1505 
1506     sticky |= shr_double(a1, 0, c);
1507     a1 = shr_double(a0, a1, c);
1508     a0 = a0 >> c;
1509 
1510  done:
1511     a->frac_lo = a1 | (sticky != 0);
1512     a->frac_hi = a0;
1513 }
1514 
1515 static void frac256_shrjam(FloatParts256 *a, int c)
1516 {
1517     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1518     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1519     uint64_t sticky = 0;
1520 
1521     if (unlikely(c == 0)) {
1522         return;
1523     } else if (likely(c < 64)) {
1524         /* nothing */
1525     } else if (likely(c < 256)) {
1526         if (unlikely(c & 128)) {
1527             sticky |= a2 | a3;
1528             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1529         }
1530         if (unlikely(c & 64)) {
1531             sticky |= a3;
1532             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1533         }
1534         c &= 63;
1535         if (c == 0) {
1536             goto done;
1537         }
1538     } else {
1539         sticky = a0 | a1 | a2 | a3;
1540         a0 = a1 = a2 = a3 = 0;
1541         goto done;
1542     }
1543 
1544     sticky |= shr_double(a3, 0, c);
1545     a3 = shr_double(a2, a3, c);
1546     a2 = shr_double(a1, a2, c);
1547     a1 = shr_double(a0, a1, c);
1548     a0 = a0 >> c;
1549 
1550  done:
1551     a->frac_lo = a3 | (sticky != 0);
1552     a->frac_lm = a2;
1553     a->frac_hm = a1;
1554     a->frac_hi = a0;
1555 }
1556 
1557 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1558 
1559 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1560 {
1561     return usub64_overflow(a->frac, b->frac, &r->frac);
1562 }
1563 
1564 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1565 {
1566     bool c = 0;
1567     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1568     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1569     return c;
1570 }
1571 
1572 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1573 {
1574     bool c = 0;
1575     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1576     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1577     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1578     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1579     return c;
1580 }
1581 
1582 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1583 
1584 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1585 {
1586     r->frac = a->frac_hi | (a->frac_lo != 0);
1587 }
1588 
1589 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1590 {
1591     r->frac_hi = a->frac_hi;
1592     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1593 }
1594 
1595 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1596 
1597 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1598 {
1599     r->frac_hi = a->frac;
1600     r->frac_lo = 0;
1601 }
1602 
1603 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1604 {
1605     r->frac_hi = a->frac_hi;
1606     r->frac_hm = a->frac_lo;
1607     r->frac_lm = 0;
1608     r->frac_lo = 0;
1609 }
1610 
1611 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1612 
1613 /*
1614  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1615  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1616  * and thus MIT licenced.
1617  */
1618 static const uint16_t rsqrt_tab[128] = {
1619     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1620     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1621     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1622     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1623     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1624     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1625     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1626     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1627     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1628     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1629     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1630     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1631     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1632     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1633     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1634     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1635 };
1636 
1637 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1638 #define FloatPartsN    glue(FloatParts,N)
1639 #define FloatPartsW    glue(FloatParts,W)
1640 
1641 #define N 64
1642 #define W 128
1643 
1644 #include "softfloat-parts-addsub.c.inc"
1645 #include "softfloat-parts.c.inc"
1646 
1647 #undef  N
1648 #undef  W
1649 #define N 128
1650 #define W 256
1651 
1652 #include "softfloat-parts-addsub.c.inc"
1653 #include "softfloat-parts.c.inc"
1654 
1655 #undef  N
1656 #undef  W
1657 #define N            256
1658 
1659 #include "softfloat-parts-addsub.c.inc"
1660 
1661 #undef  N
1662 #undef  W
1663 #undef  partsN
1664 #undef  FloatPartsN
1665 #undef  FloatPartsW
1666 
1667 /*
1668  * Pack/unpack routines with a specific FloatFmt.
1669  */
1670 
1671 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1672                                       float_status *s, const FloatFmt *params)
1673 {
1674     float16_unpack_raw(p, f);
1675     parts_canonicalize(p, s, params);
1676 }
1677 
1678 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1679                                      float_status *s)
1680 {
1681     float16a_unpack_canonical(p, f, s, &float16_params);
1682 }
1683 
1684 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1685                                       float_status *s)
1686 {
1687     bfloat16_unpack_raw(p, f);
1688     parts_canonicalize(p, s, &bfloat16_params);
1689 }
1690 
1691 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1692                                              float_status *s,
1693                                              const FloatFmt *params)
1694 {
1695     parts_uncanon(p, s, params);
1696     return float16_pack_raw(p);
1697 }
1698 
1699 static float16 float16_round_pack_canonical(FloatParts64 *p,
1700                                             float_status *s)
1701 {
1702     return float16a_round_pack_canonical(p, s, &float16_params);
1703 }
1704 
1705 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1706                                               float_status *s)
1707 {
1708     parts_uncanon(p, s, &bfloat16_params);
1709     return bfloat16_pack_raw(p);
1710 }
1711 
1712 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1713                                      float_status *s)
1714 {
1715     float32_unpack_raw(p, f);
1716     parts_canonicalize(p, s, &float32_params);
1717 }
1718 
1719 static float32 float32_round_pack_canonical(FloatParts64 *p,
1720                                             float_status *s)
1721 {
1722     parts_uncanon(p, s, &float32_params);
1723     return float32_pack_raw(p);
1724 }
1725 
1726 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1727                                      float_status *s)
1728 {
1729     float64_unpack_raw(p, f);
1730     parts_canonicalize(p, s, &float64_params);
1731 }
1732 
1733 static float64 float64_round_pack_canonical(FloatParts64 *p,
1734                                             float_status *s)
1735 {
1736     parts_uncanon(p, s, &float64_params);
1737     return float64_pack_raw(p);
1738 }
1739 
1740 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1741                                                float_status *s)
1742 {
1743     parts_uncanon(p, s, &float32_params);
1744 
1745     /*
1746      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1747      * We need to adjust the fraction higher so that the least N bits are
1748      * zero, and the fraction is adjacent to the float64 implicit bit.
1749      */
1750     switch (p->cls) {
1751     case float_class_normal:
1752     case float_class_denormal:
1753         if (unlikely(p->exp == 0)) {
1754             /*
1755              * The result is denormal for float32, but can be represented
1756              * in normalized form for float64.  Adjust, per canonicalize.
1757              */
1758             int shift = frac_normalize(p);
1759             p->exp = (float32_params.frac_shift -
1760                       float32_params.exp_bias - shift + 1 +
1761                       float64_params.exp_bias);
1762             frac_shr(p, float64_params.frac_shift);
1763         } else {
1764             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1765             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1766         }
1767         break;
1768     case float_class_snan:
1769     case float_class_qnan:
1770         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1771         p->exp = float64_params.exp_max;
1772         break;
1773     case float_class_inf:
1774         p->exp = float64_params.exp_max;
1775         break;
1776     case float_class_zero:
1777         break;
1778     default:
1779         g_assert_not_reached();
1780     }
1781 
1782     return float64_pack_raw(p);
1783 }
1784 
1785 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1786                                       float_status *s)
1787 {
1788     float128_unpack_raw(p, f);
1789     parts_canonicalize(p, s, &float128_params);
1790 }
1791 
1792 static float128 float128_round_pack_canonical(FloatParts128 *p,
1793                                               float_status *s)
1794 {
1795     parts_uncanon(p, s, &float128_params);
1796     return float128_pack_raw(p);
1797 }
1798 
1799 /* Returns false if the encoding is invalid. */
1800 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1801                                       float_status *s)
1802 {
1803     /* Ensure rounding precision is set before beginning. */
1804     switch (s->floatx80_rounding_precision) {
1805     case floatx80_precision_x:
1806     case floatx80_precision_d:
1807     case floatx80_precision_s:
1808         break;
1809     default:
1810         g_assert_not_reached();
1811     }
1812 
1813     if (unlikely(floatx80_invalid_encoding(f))) {
1814         float_raise(float_flag_invalid, s);
1815         return false;
1816     }
1817 
1818     floatx80_unpack_raw(p, f);
1819 
1820     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1821         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1822     } else {
1823         /* The explicit integer bit is ignored, after invalid checks. */
1824         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1825         p->cls = (p->frac_hi == 0 ? float_class_inf
1826                   : parts_is_snan_frac(p->frac_hi, s)
1827                   ? float_class_snan : float_class_qnan);
1828     }
1829     return true;
1830 }
1831 
1832 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1833                                               float_status *s)
1834 {
1835     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1836     uint64_t frac;
1837     int exp;
1838 
1839     switch (p->cls) {
1840     case float_class_normal:
1841     case float_class_denormal:
1842         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1843             parts_uncanon_normal(p, s, fmt);
1844             frac = p->frac_hi;
1845             exp = p->exp;
1846         } else {
1847             FloatParts64 p64;
1848 
1849             p64.sign = p->sign;
1850             p64.exp = p->exp;
1851             frac_truncjam(&p64, p);
1852             parts_uncanon_normal(&p64, s, fmt);
1853             frac = p64.frac;
1854             exp = p64.exp;
1855         }
1856         if (exp != fmt->exp_max) {
1857             break;
1858         }
1859         /* rounded to inf -- fall through to set frac correctly */
1860 
1861     case float_class_inf:
1862         /* x86 and m68k differ in the setting of the integer bit. */
1863         frac = s->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero ?
1864             0 : (1ULL << 63);
1865         exp = fmt->exp_max;
1866         break;
1867 
1868     case float_class_zero:
1869         frac = 0;
1870         exp = 0;
1871         break;
1872 
1873     case float_class_snan:
1874     case float_class_qnan:
1875         /* NaNs have the integer bit set. */
1876         frac = p->frac_hi | (1ull << 63);
1877         exp = fmt->exp_max;
1878         break;
1879 
1880     default:
1881         g_assert_not_reached();
1882     }
1883 
1884     return packFloatx80(p->sign, exp, frac);
1885 }
1886 
1887 /*
1888  * Addition and subtraction
1889  */
1890 
1891 static float16 QEMU_FLATTEN
1892 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1893 {
1894     FloatParts64 pa, pb, *pr;
1895 
1896     float16_unpack_canonical(&pa, a, status);
1897     float16_unpack_canonical(&pb, b, status);
1898     pr = parts_addsub(&pa, &pb, status, subtract);
1899 
1900     return float16_round_pack_canonical(pr, status);
1901 }
1902 
1903 float16 float16_add(float16 a, float16 b, float_status *status)
1904 {
1905     return float16_addsub(a, b, status, false);
1906 }
1907 
1908 float16 float16_sub(float16 a, float16 b, float_status *status)
1909 {
1910     return float16_addsub(a, b, status, true);
1911 }
1912 
1913 static float32 QEMU_SOFTFLOAT_ATTR
1914 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1915 {
1916     FloatParts64 pa, pb, *pr;
1917 
1918     float32_unpack_canonical(&pa, a, status);
1919     float32_unpack_canonical(&pb, b, status);
1920     pr = parts_addsub(&pa, &pb, status, subtract);
1921 
1922     return float32_round_pack_canonical(pr, status);
1923 }
1924 
1925 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1926 {
1927     return soft_f32_addsub(a, b, status, false);
1928 }
1929 
1930 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1931 {
1932     return soft_f32_addsub(a, b, status, true);
1933 }
1934 
1935 static float64 QEMU_SOFTFLOAT_ATTR
1936 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1937 {
1938     FloatParts64 pa, pb, *pr;
1939 
1940     float64_unpack_canonical(&pa, a, status);
1941     float64_unpack_canonical(&pb, b, status);
1942     pr = parts_addsub(&pa, &pb, status, subtract);
1943 
1944     return float64_round_pack_canonical(pr, status);
1945 }
1946 
1947 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1948 {
1949     return soft_f64_addsub(a, b, status, false);
1950 }
1951 
1952 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1953 {
1954     return soft_f64_addsub(a, b, status, true);
1955 }
1956 
1957 static float hard_f32_add(float a, float b)
1958 {
1959     return a + b;
1960 }
1961 
1962 static float hard_f32_sub(float a, float b)
1963 {
1964     return a - b;
1965 }
1966 
1967 static double hard_f64_add(double a, double b)
1968 {
1969     return a + b;
1970 }
1971 
1972 static double hard_f64_sub(double a, double b)
1973 {
1974     return a - b;
1975 }
1976 
1977 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1978 {
1979     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1980         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1981     }
1982     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1983 }
1984 
1985 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1986 {
1987     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1988         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1989     } else {
1990         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1991     }
1992 }
1993 
1994 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1995                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1996 {
1997     return float32_gen2(a, b, s, hard, soft,
1998                         f32_is_zon2, f32_addsubmul_post);
1999 }
2000 
2001 static float64 float64_addsub(float64 a, float64 b, float_status *s,
2002                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
2003 {
2004     return float64_gen2(a, b, s, hard, soft,
2005                         f64_is_zon2, f64_addsubmul_post);
2006 }
2007 
2008 float32 QEMU_FLATTEN
2009 float32_add(float32 a, float32 b, float_status *s)
2010 {
2011     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
2012 }
2013 
2014 float32 QEMU_FLATTEN
2015 float32_sub(float32 a, float32 b, float_status *s)
2016 {
2017     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
2018 }
2019 
2020 float64 QEMU_FLATTEN
2021 float64_add(float64 a, float64 b, float_status *s)
2022 {
2023     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2024 }
2025 
2026 float64 QEMU_FLATTEN
2027 float64_sub(float64 a, float64 b, float_status *s)
2028 {
2029     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2030 }
2031 
2032 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2033                                  bool subtract)
2034 {
2035     FloatParts64 pa, pb, *pr;
2036 
2037     float64_unpack_canonical(&pa, a, status);
2038     float64_unpack_canonical(&pb, b, status);
2039     pr = parts_addsub(&pa, &pb, status, subtract);
2040 
2041     return float64r32_round_pack_canonical(pr, status);
2042 }
2043 
2044 float64 float64r32_add(float64 a, float64 b, float_status *status)
2045 {
2046     return float64r32_addsub(a, b, status, false);
2047 }
2048 
2049 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2050 {
2051     return float64r32_addsub(a, b, status, true);
2052 }
2053 
2054 static bfloat16 QEMU_FLATTEN
2055 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2056 {
2057     FloatParts64 pa, pb, *pr;
2058 
2059     bfloat16_unpack_canonical(&pa, a, status);
2060     bfloat16_unpack_canonical(&pb, b, status);
2061     pr = parts_addsub(&pa, &pb, status, subtract);
2062 
2063     return bfloat16_round_pack_canonical(pr, status);
2064 }
2065 
2066 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2067 {
2068     return bfloat16_addsub(a, b, status, false);
2069 }
2070 
2071 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2072 {
2073     return bfloat16_addsub(a, b, status, true);
2074 }
2075 
2076 static float128 QEMU_FLATTEN
2077 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2078 {
2079     FloatParts128 pa, pb, *pr;
2080 
2081     float128_unpack_canonical(&pa, a, status);
2082     float128_unpack_canonical(&pb, b, status);
2083     pr = parts_addsub(&pa, &pb, status, subtract);
2084 
2085     return float128_round_pack_canonical(pr, status);
2086 }
2087 
2088 float128 float128_add(float128 a, float128 b, float_status *status)
2089 {
2090     return float128_addsub(a, b, status, false);
2091 }
2092 
2093 float128 float128_sub(float128 a, float128 b, float_status *status)
2094 {
2095     return float128_addsub(a, b, status, true);
2096 }
2097 
2098 static floatx80 QEMU_FLATTEN
2099 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2100 {
2101     FloatParts128 pa, pb, *pr;
2102 
2103     if (!floatx80_unpack_canonical(&pa, a, status) ||
2104         !floatx80_unpack_canonical(&pb, b, status)) {
2105         return floatx80_default_nan(status);
2106     }
2107 
2108     pr = parts_addsub(&pa, &pb, status, subtract);
2109     return floatx80_round_pack_canonical(pr, status);
2110 }
2111 
2112 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2113 {
2114     return floatx80_addsub(a, b, status, false);
2115 }
2116 
2117 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2118 {
2119     return floatx80_addsub(a, b, status, true);
2120 }
2121 
2122 /*
2123  * Multiplication
2124  */
2125 
2126 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2127 {
2128     FloatParts64 pa, pb, *pr;
2129 
2130     float16_unpack_canonical(&pa, a, status);
2131     float16_unpack_canonical(&pb, b, status);
2132     pr = parts_mul(&pa, &pb, status);
2133 
2134     return float16_round_pack_canonical(pr, status);
2135 }
2136 
2137 static float32 QEMU_SOFTFLOAT_ATTR
2138 soft_f32_mul(float32 a, float32 b, float_status *status)
2139 {
2140     FloatParts64 pa, pb, *pr;
2141 
2142     float32_unpack_canonical(&pa, a, status);
2143     float32_unpack_canonical(&pb, b, status);
2144     pr = parts_mul(&pa, &pb, status);
2145 
2146     return float32_round_pack_canonical(pr, status);
2147 }
2148 
2149 static float64 QEMU_SOFTFLOAT_ATTR
2150 soft_f64_mul(float64 a, float64 b, float_status *status)
2151 {
2152     FloatParts64 pa, pb, *pr;
2153 
2154     float64_unpack_canonical(&pa, a, status);
2155     float64_unpack_canonical(&pb, b, status);
2156     pr = parts_mul(&pa, &pb, status);
2157 
2158     return float64_round_pack_canonical(pr, status);
2159 }
2160 
2161 static float hard_f32_mul(float a, float b)
2162 {
2163     return a * b;
2164 }
2165 
2166 static double hard_f64_mul(double a, double b)
2167 {
2168     return a * b;
2169 }
2170 
2171 float32 QEMU_FLATTEN
2172 float32_mul(float32 a, float32 b, float_status *s)
2173 {
2174     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2175                         f32_is_zon2, f32_addsubmul_post);
2176 }
2177 
2178 float64 QEMU_FLATTEN
2179 float64_mul(float64 a, float64 b, float_status *s)
2180 {
2181     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2182                         f64_is_zon2, f64_addsubmul_post);
2183 }
2184 
2185 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2186 {
2187     FloatParts64 pa, pb, *pr;
2188 
2189     float64_unpack_canonical(&pa, a, status);
2190     float64_unpack_canonical(&pb, b, status);
2191     pr = parts_mul(&pa, &pb, status);
2192 
2193     return float64r32_round_pack_canonical(pr, status);
2194 }
2195 
2196 bfloat16 QEMU_FLATTEN
2197 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2198 {
2199     FloatParts64 pa, pb, *pr;
2200 
2201     bfloat16_unpack_canonical(&pa, a, status);
2202     bfloat16_unpack_canonical(&pb, b, status);
2203     pr = parts_mul(&pa, &pb, status);
2204 
2205     return bfloat16_round_pack_canonical(pr, status);
2206 }
2207 
2208 float128 QEMU_FLATTEN
2209 float128_mul(float128 a, float128 b, float_status *status)
2210 {
2211     FloatParts128 pa, pb, *pr;
2212 
2213     float128_unpack_canonical(&pa, a, status);
2214     float128_unpack_canonical(&pb, b, status);
2215     pr = parts_mul(&pa, &pb, status);
2216 
2217     return float128_round_pack_canonical(pr, status);
2218 }
2219 
2220 floatx80 QEMU_FLATTEN
2221 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2222 {
2223     FloatParts128 pa, pb, *pr;
2224 
2225     if (!floatx80_unpack_canonical(&pa, a, status) ||
2226         !floatx80_unpack_canonical(&pb, b, status)) {
2227         return floatx80_default_nan(status);
2228     }
2229 
2230     pr = parts_mul(&pa, &pb, status);
2231     return floatx80_round_pack_canonical(pr, status);
2232 }
2233 
2234 /*
2235  * Fused multiply-add
2236  */
2237 
2238 float16 QEMU_FLATTEN
2239 float16_muladd_scalbn(float16 a, float16 b, float16 c,
2240                       int scale, int flags, float_status *status)
2241 {
2242     FloatParts64 pa, pb, pc, *pr;
2243 
2244     float16_unpack_canonical(&pa, a, status);
2245     float16_unpack_canonical(&pb, b, status);
2246     float16_unpack_canonical(&pc, c, status);
2247     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2248 
2249     return float16_round_pack_canonical(pr, status);
2250 }
2251 
2252 float16 float16_muladd(float16 a, float16 b, float16 c,
2253                        int flags, float_status *status)
2254 {
2255     return float16_muladd_scalbn(a, b, c, 0, flags, status);
2256 }
2257 
2258 float32 QEMU_SOFTFLOAT_ATTR
2259 float32_muladd_scalbn(float32 a, float32 b, float32 c,
2260                       int scale, int flags, float_status *status)
2261 {
2262     FloatParts64 pa, pb, pc, *pr;
2263 
2264     float32_unpack_canonical(&pa, a, status);
2265     float32_unpack_canonical(&pb, b, status);
2266     float32_unpack_canonical(&pc, c, status);
2267     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2268 
2269     return float32_round_pack_canonical(pr, status);
2270 }
2271 
2272 float64 QEMU_SOFTFLOAT_ATTR
2273 float64_muladd_scalbn(float64 a, float64 b, float64 c,
2274                       int scale, int flags, float_status *status)
2275 {
2276     FloatParts64 pa, pb, pc, *pr;
2277 
2278     float64_unpack_canonical(&pa, a, status);
2279     float64_unpack_canonical(&pb, b, status);
2280     float64_unpack_canonical(&pc, c, status);
2281     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2282 
2283     return float64_round_pack_canonical(pr, status);
2284 }
2285 
2286 static bool force_soft_fma;
2287 
2288 float32 QEMU_FLATTEN
2289 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2290 {
2291     union_float32 ua, ub, uc, ur;
2292 
2293     ua.s = xa;
2294     ub.s = xb;
2295     uc.s = xc;
2296 
2297     if (unlikely(!can_use_fpu(s))) {
2298         goto soft;
2299     }
2300     if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
2301         goto soft;
2302     }
2303 
2304     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2305     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2306         goto soft;
2307     }
2308 
2309     if (unlikely(force_soft_fma)) {
2310         goto soft;
2311     }
2312 
2313     /*
2314      * When (a || b) == 0, there's no need to check for under/over flow,
2315      * since we know the addend is (normal || 0) and the product is 0.
2316      */
2317     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2318         union_float32 up;
2319         bool prod_sign;
2320 
2321         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2322         prod_sign ^= !!(flags & float_muladd_negate_product);
2323         up.s = float32_set_sign(float32_zero, prod_sign);
2324 
2325         if (flags & float_muladd_negate_c) {
2326             uc.h = -uc.h;
2327         }
2328         ur.h = up.h + uc.h;
2329     } else {
2330         union_float32 ua_orig = ua;
2331         union_float32 uc_orig = uc;
2332 
2333         if (flags & float_muladd_negate_product) {
2334             ua.h = -ua.h;
2335         }
2336         if (flags & float_muladd_negate_c) {
2337             uc.h = -uc.h;
2338         }
2339 
2340         ur.h = fmaf(ua.h, ub.h, uc.h);
2341 
2342         if (unlikely(f32_is_inf(ur))) {
2343             float_raise(float_flag_overflow, s);
2344         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2345             ua = ua_orig;
2346             uc = uc_orig;
2347             goto soft;
2348         }
2349     }
2350     if (flags & float_muladd_negate_result) {
2351         return float32_chs(ur.s);
2352     }
2353     return ur.s;
2354 
2355  soft:
2356     return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2357 }
2358 
2359 float64 QEMU_FLATTEN
2360 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2361 {
2362     union_float64 ua, ub, uc, ur;
2363 
2364     ua.s = xa;
2365     ub.s = xb;
2366     uc.s = xc;
2367 
2368     if (unlikely(!can_use_fpu(s))) {
2369         goto soft;
2370     }
2371 
2372     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2373     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2374         goto soft;
2375     }
2376 
2377     if (unlikely(force_soft_fma)) {
2378         goto soft;
2379     }
2380 
2381     /*
2382      * When (a || b) == 0, there's no need to check for under/over flow,
2383      * since we know the addend is (normal || 0) and the product is 0.
2384      */
2385     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2386         union_float64 up;
2387         bool prod_sign;
2388 
2389         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2390         prod_sign ^= !!(flags & float_muladd_negate_product);
2391         up.s = float64_set_sign(float64_zero, prod_sign);
2392 
2393         if (flags & float_muladd_negate_c) {
2394             uc.h = -uc.h;
2395         }
2396         ur.h = up.h + uc.h;
2397     } else {
2398         union_float64 ua_orig = ua;
2399         union_float64 uc_orig = uc;
2400 
2401         if (flags & float_muladd_negate_product) {
2402             ua.h = -ua.h;
2403         }
2404         if (flags & float_muladd_negate_c) {
2405             uc.h = -uc.h;
2406         }
2407 
2408         ur.h = fma(ua.h, ub.h, uc.h);
2409 
2410         if (unlikely(f64_is_inf(ur))) {
2411             float_raise(float_flag_overflow, s);
2412         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2413             ua = ua_orig;
2414             uc = uc_orig;
2415             goto soft;
2416         }
2417     }
2418     if (flags & float_muladd_negate_result) {
2419         return float64_chs(ur.s);
2420     }
2421     return ur.s;
2422 
2423  soft:
2424     return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2425 }
2426 
2427 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2428                           int flags, float_status *status)
2429 {
2430     FloatParts64 pa, pb, pc, *pr;
2431 
2432     float64_unpack_canonical(&pa, a, status);
2433     float64_unpack_canonical(&pb, b, status);
2434     float64_unpack_canonical(&pc, c, status);
2435     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2436 
2437     return float64r32_round_pack_canonical(pr, status);
2438 }
2439 
2440 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2441                                       int flags, float_status *status)
2442 {
2443     FloatParts64 pa, pb, pc, *pr;
2444 
2445     bfloat16_unpack_canonical(&pa, a, status);
2446     bfloat16_unpack_canonical(&pb, b, status);
2447     bfloat16_unpack_canonical(&pc, c, status);
2448     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2449 
2450     return bfloat16_round_pack_canonical(pr, status);
2451 }
2452 
2453 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2454                                       int flags, float_status *status)
2455 {
2456     FloatParts128 pa, pb, pc, *pr;
2457 
2458     float128_unpack_canonical(&pa, a, status);
2459     float128_unpack_canonical(&pb, b, status);
2460     float128_unpack_canonical(&pc, c, status);
2461     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2462 
2463     return float128_round_pack_canonical(pr, status);
2464 }
2465 
2466 /*
2467  * Division
2468  */
2469 
2470 float16 float16_div(float16 a, float16 b, float_status *status)
2471 {
2472     FloatParts64 pa, pb, *pr;
2473 
2474     float16_unpack_canonical(&pa, a, status);
2475     float16_unpack_canonical(&pb, b, status);
2476     pr = parts_div(&pa, &pb, status);
2477 
2478     return float16_round_pack_canonical(pr, status);
2479 }
2480 
2481 static float32 QEMU_SOFTFLOAT_ATTR
2482 soft_f32_div(float32 a, float32 b, float_status *status)
2483 {
2484     FloatParts64 pa, pb, *pr;
2485 
2486     float32_unpack_canonical(&pa, a, status);
2487     float32_unpack_canonical(&pb, b, status);
2488     pr = parts_div(&pa, &pb, status);
2489 
2490     return float32_round_pack_canonical(pr, status);
2491 }
2492 
2493 static float64 QEMU_SOFTFLOAT_ATTR
2494 soft_f64_div(float64 a, float64 b, float_status *status)
2495 {
2496     FloatParts64 pa, pb, *pr;
2497 
2498     float64_unpack_canonical(&pa, a, status);
2499     float64_unpack_canonical(&pb, b, status);
2500     pr = parts_div(&pa, &pb, status);
2501 
2502     return float64_round_pack_canonical(pr, status);
2503 }
2504 
2505 static float hard_f32_div(float a, float b)
2506 {
2507     return a / b;
2508 }
2509 
2510 static double hard_f64_div(double a, double b)
2511 {
2512     return a / b;
2513 }
2514 
2515 static bool f32_div_pre(union_float32 a, union_float32 b)
2516 {
2517     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2518         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2519                fpclassify(b.h) == FP_NORMAL;
2520     }
2521     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2522 }
2523 
2524 static bool f64_div_pre(union_float64 a, union_float64 b)
2525 {
2526     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2527         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2528                fpclassify(b.h) == FP_NORMAL;
2529     }
2530     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2531 }
2532 
2533 static bool f32_div_post(union_float32 a, union_float32 b)
2534 {
2535     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2536         return fpclassify(a.h) != FP_ZERO;
2537     }
2538     return !float32_is_zero(a.s);
2539 }
2540 
2541 static bool f64_div_post(union_float64 a, union_float64 b)
2542 {
2543     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2544         return fpclassify(a.h) != FP_ZERO;
2545     }
2546     return !float64_is_zero(a.s);
2547 }
2548 
2549 float32 QEMU_FLATTEN
2550 float32_div(float32 a, float32 b, float_status *s)
2551 {
2552     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2553                         f32_div_pre, f32_div_post);
2554 }
2555 
2556 float64 QEMU_FLATTEN
2557 float64_div(float64 a, float64 b, float_status *s)
2558 {
2559     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2560                         f64_div_pre, f64_div_post);
2561 }
2562 
2563 float64 float64r32_div(float64 a, float64 b, float_status *status)
2564 {
2565     FloatParts64 pa, pb, *pr;
2566 
2567     float64_unpack_canonical(&pa, a, status);
2568     float64_unpack_canonical(&pb, b, status);
2569     pr = parts_div(&pa, &pb, status);
2570 
2571     return float64r32_round_pack_canonical(pr, status);
2572 }
2573 
2574 bfloat16 QEMU_FLATTEN
2575 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2576 {
2577     FloatParts64 pa, pb, *pr;
2578 
2579     bfloat16_unpack_canonical(&pa, a, status);
2580     bfloat16_unpack_canonical(&pb, b, status);
2581     pr = parts_div(&pa, &pb, status);
2582 
2583     return bfloat16_round_pack_canonical(pr, status);
2584 }
2585 
2586 float128 QEMU_FLATTEN
2587 float128_div(float128 a, float128 b, float_status *status)
2588 {
2589     FloatParts128 pa, pb, *pr;
2590 
2591     float128_unpack_canonical(&pa, a, status);
2592     float128_unpack_canonical(&pb, b, status);
2593     pr = parts_div(&pa, &pb, status);
2594 
2595     return float128_round_pack_canonical(pr, status);
2596 }
2597 
2598 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2599 {
2600     FloatParts128 pa, pb, *pr;
2601 
2602     if (!floatx80_unpack_canonical(&pa, a, status) ||
2603         !floatx80_unpack_canonical(&pb, b, status)) {
2604         return floatx80_default_nan(status);
2605     }
2606 
2607     pr = parts_div(&pa, &pb, status);
2608     return floatx80_round_pack_canonical(pr, status);
2609 }
2610 
2611 /*
2612  * Remainder
2613  */
2614 
2615 float32 float32_rem(float32 a, float32 b, float_status *status)
2616 {
2617     FloatParts64 pa, pb, *pr;
2618 
2619     float32_unpack_canonical(&pa, a, status);
2620     float32_unpack_canonical(&pb, b, status);
2621     pr = parts_modrem(&pa, &pb, NULL, status);
2622 
2623     return float32_round_pack_canonical(pr, status);
2624 }
2625 
2626 float64 float64_rem(float64 a, float64 b, float_status *status)
2627 {
2628     FloatParts64 pa, pb, *pr;
2629 
2630     float64_unpack_canonical(&pa, a, status);
2631     float64_unpack_canonical(&pb, b, status);
2632     pr = parts_modrem(&pa, &pb, NULL, status);
2633 
2634     return float64_round_pack_canonical(pr, status);
2635 }
2636 
2637 float128 float128_rem(float128 a, float128 b, float_status *status)
2638 {
2639     FloatParts128 pa, pb, *pr;
2640 
2641     float128_unpack_canonical(&pa, a, status);
2642     float128_unpack_canonical(&pb, b, status);
2643     pr = parts_modrem(&pa, &pb, NULL, status);
2644 
2645     return float128_round_pack_canonical(pr, status);
2646 }
2647 
2648 /*
2649  * Returns the remainder of the extended double-precision floating-point value
2650  * `a' with respect to the corresponding value `b'.
2651  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2652  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2653  * the remainder based on truncating the quotient toward zero instead and
2654  * *quotient is set to the low 64 bits of the absolute value of the integer
2655  * quotient.
2656  */
2657 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2658                          uint64_t *quotient, float_status *status)
2659 {
2660     FloatParts128 pa, pb, *pr;
2661 
2662     *quotient = 0;
2663     if (!floatx80_unpack_canonical(&pa, a, status) ||
2664         !floatx80_unpack_canonical(&pb, b, status)) {
2665         return floatx80_default_nan(status);
2666     }
2667     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2668 
2669     return floatx80_round_pack_canonical(pr, status);
2670 }
2671 
2672 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2673 {
2674     uint64_t quotient;
2675     return floatx80_modrem(a, b, false, &quotient, status);
2676 }
2677 
2678 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2679 {
2680     uint64_t quotient;
2681     return floatx80_modrem(a, b, true, &quotient, status);
2682 }
2683 
2684 /*
2685  * Float to Float conversions
2686  *
2687  * Returns the result of converting one float format to another. The
2688  * conversion is performed according to the IEC/IEEE Standard for
2689  * Binary Floating-Point Arithmetic.
2690  *
2691  * Usually this only needs to take care of raising invalid exceptions
2692  * and handling the conversion on NaNs.
2693  */
2694 
2695 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2696 {
2697     switch (a->cls) {
2698     case float_class_snan:
2699         float_raise(float_flag_invalid_snan, s);
2700         /* fall through */
2701     case float_class_qnan:
2702         /*
2703          * There is no NaN in the destination format.  Raise Invalid
2704          * and return a zero with the sign of the input NaN.
2705          */
2706         float_raise(float_flag_invalid, s);
2707         a->cls = float_class_zero;
2708         break;
2709 
2710     case float_class_inf:
2711         /*
2712          * There is no Inf in the destination format.  Raise Invalid
2713          * and return the maximum normal with the correct sign.
2714          */
2715         float_raise(float_flag_invalid, s);
2716         a->cls = float_class_normal;
2717         a->exp = float16_params_ahp.exp_max;
2718         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2719                                   float16_params_ahp.frac_size + 1);
2720         break;
2721 
2722     case float_class_denormal:
2723         float_raise(float_flag_input_denormal_used, s);
2724         break;
2725     case float_class_normal:
2726     case float_class_zero:
2727         break;
2728 
2729     default:
2730         g_assert_not_reached();
2731     }
2732 }
2733 
2734 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2735 {
2736     if (is_nan(a->cls)) {
2737         parts_return_nan(a, s);
2738     }
2739     if (a->cls == float_class_denormal) {
2740         float_raise(float_flag_input_denormal_used, s);
2741     }
2742 }
2743 
2744 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2745 {
2746     if (is_nan(a->cls)) {
2747         parts_return_nan(a, s);
2748     }
2749     if (a->cls == float_class_denormal) {
2750         float_raise(float_flag_input_denormal_used, s);
2751     }
2752 }
2753 
2754 #define parts_float_to_float(P, S) \
2755     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2756 
2757 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2758                                         float_status *s)
2759 {
2760     a->cls = b->cls;
2761     a->sign = b->sign;
2762     a->exp = b->exp;
2763 
2764     switch (a->cls) {
2765     case float_class_denormal:
2766         float_raise(float_flag_input_denormal_used, s);
2767         /* fall through */
2768     case float_class_normal:
2769         frac_truncjam(a, b);
2770         break;
2771     case float_class_snan:
2772     case float_class_qnan:
2773         /* Discard the low bits of the NaN. */
2774         a->frac = b->frac_hi;
2775         parts_return_nan(a, s);
2776         break;
2777     default:
2778         break;
2779     }
2780 }
2781 
2782 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2783                                        float_status *s)
2784 {
2785     a->cls = b->cls;
2786     a->sign = b->sign;
2787     a->exp = b->exp;
2788     frac_widen(a, b);
2789 
2790     if (is_nan(a->cls)) {
2791         parts_return_nan(a, s);
2792     }
2793     if (a->cls == float_class_denormal) {
2794         float_raise(float_flag_input_denormal_used, s);
2795     }
2796 }
2797 
2798 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2799 {
2800     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2801     FloatParts64 p;
2802 
2803     float16a_unpack_canonical(&p, a, s, fmt16);
2804     parts_float_to_float(&p, s);
2805     return float32_round_pack_canonical(&p, s);
2806 }
2807 
2808 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2809 {
2810     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2811     FloatParts64 p;
2812 
2813     float16a_unpack_canonical(&p, a, s, fmt16);
2814     parts_float_to_float(&p, s);
2815     return float64_round_pack_canonical(&p, s);
2816 }
2817 
2818 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2819 {
2820     FloatParts64 p;
2821     const FloatFmt *fmt;
2822 
2823     float32_unpack_canonical(&p, a, s);
2824     if (ieee) {
2825         parts_float_to_float(&p, s);
2826         fmt = &float16_params;
2827     } else {
2828         parts_float_to_ahp(&p, s);
2829         fmt = &float16_params_ahp;
2830     }
2831     return float16a_round_pack_canonical(&p, s, fmt);
2832 }
2833 
2834 static float64 QEMU_SOFTFLOAT_ATTR
2835 soft_float32_to_float64(float32 a, float_status *s)
2836 {
2837     FloatParts64 p;
2838 
2839     float32_unpack_canonical(&p, a, s);
2840     parts_float_to_float(&p, s);
2841     return float64_round_pack_canonical(&p, s);
2842 }
2843 
2844 float64 float32_to_float64(float32 a, float_status *s)
2845 {
2846     if (likely(float32_is_normal(a))) {
2847         /* Widening conversion can never produce inexact results.  */
2848         union_float32 uf;
2849         union_float64 ud;
2850         uf.s = a;
2851         ud.h = uf.h;
2852         return ud.s;
2853     } else if (float32_is_zero(a)) {
2854         return float64_set_sign(float64_zero, float32_is_neg(a));
2855     } else {
2856         return soft_float32_to_float64(a, s);
2857     }
2858 }
2859 
2860 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2861 {
2862     FloatParts64 p;
2863     const FloatFmt *fmt;
2864 
2865     float64_unpack_canonical(&p, a, s);
2866     if (ieee) {
2867         parts_float_to_float(&p, s);
2868         fmt = &float16_params;
2869     } else {
2870         parts_float_to_ahp(&p, s);
2871         fmt = &float16_params_ahp;
2872     }
2873     return float16a_round_pack_canonical(&p, s, fmt);
2874 }
2875 
2876 float32 float64_to_float32(float64 a, float_status *s)
2877 {
2878     FloatParts64 p;
2879 
2880     float64_unpack_canonical(&p, a, s);
2881     parts_float_to_float(&p, s);
2882     return float32_round_pack_canonical(&p, s);
2883 }
2884 
2885 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2886 {
2887     FloatParts64 p;
2888 
2889     bfloat16_unpack_canonical(&p, a, s);
2890     parts_float_to_float(&p, s);
2891     return float32_round_pack_canonical(&p, s);
2892 }
2893 
2894 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2895 {
2896     FloatParts64 p;
2897 
2898     bfloat16_unpack_canonical(&p, a, s);
2899     parts_float_to_float(&p, s);
2900     return float64_round_pack_canonical(&p, s);
2901 }
2902 
2903 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2904 {
2905     FloatParts64 p;
2906 
2907     float32_unpack_canonical(&p, a, s);
2908     parts_float_to_float(&p, s);
2909     return bfloat16_round_pack_canonical(&p, s);
2910 }
2911 
2912 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2913 {
2914     FloatParts64 p;
2915 
2916     float64_unpack_canonical(&p, a, s);
2917     parts_float_to_float(&p, s);
2918     return bfloat16_round_pack_canonical(&p, s);
2919 }
2920 
2921 float32 float128_to_float32(float128 a, float_status *s)
2922 {
2923     FloatParts64 p64;
2924     FloatParts128 p128;
2925 
2926     float128_unpack_canonical(&p128, a, s);
2927     parts_float_to_float_narrow(&p64, &p128, s);
2928     return float32_round_pack_canonical(&p64, s);
2929 }
2930 
2931 float64 float128_to_float64(float128 a, float_status *s)
2932 {
2933     FloatParts64 p64;
2934     FloatParts128 p128;
2935 
2936     float128_unpack_canonical(&p128, a, s);
2937     parts_float_to_float_narrow(&p64, &p128, s);
2938     return float64_round_pack_canonical(&p64, s);
2939 }
2940 
2941 float128 float32_to_float128(float32 a, float_status *s)
2942 {
2943     FloatParts64 p64;
2944     FloatParts128 p128;
2945 
2946     float32_unpack_canonical(&p64, a, s);
2947     parts_float_to_float_widen(&p128, &p64, s);
2948     return float128_round_pack_canonical(&p128, s);
2949 }
2950 
2951 float128 float64_to_float128(float64 a, float_status *s)
2952 {
2953     FloatParts64 p64;
2954     FloatParts128 p128;
2955 
2956     float64_unpack_canonical(&p64, a, s);
2957     parts_float_to_float_widen(&p128, &p64, s);
2958     return float128_round_pack_canonical(&p128, s);
2959 }
2960 
2961 float32 floatx80_to_float32(floatx80 a, float_status *s)
2962 {
2963     FloatParts64 p64;
2964     FloatParts128 p128;
2965 
2966     if (floatx80_unpack_canonical(&p128, a, s)) {
2967         parts_float_to_float_narrow(&p64, &p128, s);
2968     } else {
2969         parts_default_nan(&p64, s);
2970     }
2971     return float32_round_pack_canonical(&p64, s);
2972 }
2973 
2974 float64 floatx80_to_float64(floatx80 a, float_status *s)
2975 {
2976     FloatParts64 p64;
2977     FloatParts128 p128;
2978 
2979     if (floatx80_unpack_canonical(&p128, a, s)) {
2980         parts_float_to_float_narrow(&p64, &p128, s);
2981     } else {
2982         parts_default_nan(&p64, s);
2983     }
2984     return float64_round_pack_canonical(&p64, s);
2985 }
2986 
2987 float128 floatx80_to_float128(floatx80 a, float_status *s)
2988 {
2989     FloatParts128 p;
2990 
2991     if (floatx80_unpack_canonical(&p, a, s)) {
2992         parts_float_to_float(&p, s);
2993     } else {
2994         parts_default_nan(&p, s);
2995     }
2996     return float128_round_pack_canonical(&p, s);
2997 }
2998 
2999 floatx80 float32_to_floatx80(float32 a, float_status *s)
3000 {
3001     FloatParts64 p64;
3002     FloatParts128 p128;
3003 
3004     float32_unpack_canonical(&p64, a, s);
3005     parts_float_to_float_widen(&p128, &p64, s);
3006     return floatx80_round_pack_canonical(&p128, s);
3007 }
3008 
3009 floatx80 float64_to_floatx80(float64 a, float_status *s)
3010 {
3011     FloatParts64 p64;
3012     FloatParts128 p128;
3013 
3014     float64_unpack_canonical(&p64, a, s);
3015     parts_float_to_float_widen(&p128, &p64, s);
3016     return floatx80_round_pack_canonical(&p128, s);
3017 }
3018 
3019 floatx80 float128_to_floatx80(float128 a, float_status *s)
3020 {
3021     FloatParts128 p;
3022 
3023     float128_unpack_canonical(&p, a, s);
3024     parts_float_to_float(&p, s);
3025     return floatx80_round_pack_canonical(&p, s);
3026 }
3027 
3028 /*
3029  * Round to integral value
3030  */
3031 
3032 float16 float16_round_to_int(float16 a, float_status *s)
3033 {
3034     FloatParts64 p;
3035 
3036     float16_unpack_canonical(&p, a, s);
3037     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
3038     return float16_round_pack_canonical(&p, s);
3039 }
3040 
3041 float32 float32_round_to_int(float32 a, float_status *s)
3042 {
3043     FloatParts64 p;
3044 
3045     float32_unpack_canonical(&p, a, s);
3046     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
3047     return float32_round_pack_canonical(&p, s);
3048 }
3049 
3050 float64 float64_round_to_int(float64 a, float_status *s)
3051 {
3052     FloatParts64 p;
3053 
3054     float64_unpack_canonical(&p, a, s);
3055     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3056     return float64_round_pack_canonical(&p, s);
3057 }
3058 
3059 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3060 {
3061     FloatParts64 p;
3062 
3063     bfloat16_unpack_canonical(&p, a, s);
3064     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3065     return bfloat16_round_pack_canonical(&p, s);
3066 }
3067 
3068 float128 float128_round_to_int(float128 a, float_status *s)
3069 {
3070     FloatParts128 p;
3071 
3072     float128_unpack_canonical(&p, a, s);
3073     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3074     return float128_round_pack_canonical(&p, s);
3075 }
3076 
3077 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3078 {
3079     FloatParts128 p;
3080 
3081     if (!floatx80_unpack_canonical(&p, a, status)) {
3082         return floatx80_default_nan(status);
3083     }
3084 
3085     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3086                        &floatx80_params[status->floatx80_rounding_precision]);
3087     return floatx80_round_pack_canonical(&p, status);
3088 }
3089 
3090 /*
3091  * Floating-point to signed integer conversions
3092  */
3093 
3094 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3095                               float_status *s)
3096 {
3097     FloatParts64 p;
3098 
3099     float16_unpack_canonical(&p, a, s);
3100     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3101 }
3102 
3103 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3104                                 float_status *s)
3105 {
3106     FloatParts64 p;
3107 
3108     float16_unpack_canonical(&p, a, s);
3109     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3110 }
3111 
3112 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3113                                 float_status *s)
3114 {
3115     FloatParts64 p;
3116 
3117     float16_unpack_canonical(&p, a, s);
3118     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3119 }
3120 
3121 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3122                                 float_status *s)
3123 {
3124     FloatParts64 p;
3125 
3126     float16_unpack_canonical(&p, a, s);
3127     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3128 }
3129 
3130 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3131                                 float_status *s)
3132 {
3133     FloatParts64 p;
3134 
3135     float32_unpack_canonical(&p, a, s);
3136     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3137 }
3138 
3139 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3140                                 float_status *s)
3141 {
3142     FloatParts64 p;
3143 
3144     float32_unpack_canonical(&p, a, s);
3145     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3146 }
3147 
3148 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3149                                 float_status *s)
3150 {
3151     FloatParts64 p;
3152 
3153     float32_unpack_canonical(&p, a, s);
3154     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3155 }
3156 
3157 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3158                                 float_status *s)
3159 {
3160     FloatParts64 p;
3161 
3162     float64_unpack_canonical(&p, a, s);
3163     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3164 }
3165 
3166 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3167                                 float_status *s)
3168 {
3169     FloatParts64 p;
3170 
3171     float64_unpack_canonical(&p, a, s);
3172     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3173 }
3174 
3175 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3176                                 float_status *s)
3177 {
3178     FloatParts64 p;
3179 
3180     float64_unpack_canonical(&p, a, s);
3181     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3182 }
3183 
3184 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3185                                float_status *s)
3186 {
3187     FloatParts64 p;
3188 
3189     bfloat16_unpack_canonical(&p, a, s);
3190     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3191 }
3192 
3193 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3194                                  float_status *s)
3195 {
3196     FloatParts64 p;
3197 
3198     bfloat16_unpack_canonical(&p, a, s);
3199     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3200 }
3201 
3202 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3203                                  float_status *s)
3204 {
3205     FloatParts64 p;
3206 
3207     bfloat16_unpack_canonical(&p, a, s);
3208     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3209 }
3210 
3211 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3212                                  float_status *s)
3213 {
3214     FloatParts64 p;
3215 
3216     bfloat16_unpack_canonical(&p, a, s);
3217     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3218 }
3219 
3220 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3221                                         int scale, float_status *s)
3222 {
3223     FloatParts128 p;
3224 
3225     float128_unpack_canonical(&p, a, s);
3226     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3227 }
3228 
3229 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3230                                         int scale, float_status *s)
3231 {
3232     FloatParts128 p;
3233 
3234     float128_unpack_canonical(&p, a, s);
3235     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3236 }
3237 
3238 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3239                                         int scale, float_status *s)
3240 {
3241     int flags = 0;
3242     Int128 r;
3243     FloatParts128 p;
3244 
3245     float128_unpack_canonical(&p, a, s);
3246 
3247     switch (p.cls) {
3248     case float_class_snan:
3249         flags |= float_flag_invalid_snan;
3250         /* fall through */
3251     case float_class_qnan:
3252         flags |= float_flag_invalid;
3253         r = UINT128_MAX;
3254         break;
3255 
3256     case float_class_inf:
3257         flags = float_flag_invalid | float_flag_invalid_cvti;
3258         r = p.sign ? INT128_MIN : INT128_MAX;
3259         break;
3260 
3261     case float_class_zero:
3262         return int128_zero();
3263 
3264     case float_class_normal:
3265     case float_class_denormal:
3266         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3267             flags = float_flag_inexact;
3268         }
3269 
3270         if (p.exp < 127) {
3271             int shift = 127 - p.exp;
3272             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3273             if (p.sign) {
3274                 r = int128_neg(r);
3275             }
3276         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3277                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3278             r = INT128_MIN;
3279         } else {
3280             flags = float_flag_invalid | float_flag_invalid_cvti;
3281             r = p.sign ? INT128_MIN : INT128_MAX;
3282         }
3283         break;
3284 
3285     default:
3286         g_assert_not_reached();
3287     }
3288 
3289     float_raise(flags, s);
3290     return r;
3291 }
3292 
3293 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3294                                         int scale, float_status *s)
3295 {
3296     FloatParts128 p;
3297 
3298     if (!floatx80_unpack_canonical(&p, a, s)) {
3299         parts_default_nan(&p, s);
3300     }
3301     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3302 }
3303 
3304 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3305                                         int scale, float_status *s)
3306 {
3307     FloatParts128 p;
3308 
3309     if (!floatx80_unpack_canonical(&p, a, s)) {
3310         parts_default_nan(&p, s);
3311     }
3312     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3313 }
3314 
3315 int8_t float16_to_int8(float16 a, float_status *s)
3316 {
3317     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3318 }
3319 
3320 int16_t float16_to_int16(float16 a, float_status *s)
3321 {
3322     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3323 }
3324 
3325 int32_t float16_to_int32(float16 a, float_status *s)
3326 {
3327     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3328 }
3329 
3330 int64_t float16_to_int64(float16 a, float_status *s)
3331 {
3332     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3333 }
3334 
3335 int16_t float32_to_int16(float32 a, float_status *s)
3336 {
3337     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3338 }
3339 
3340 int32_t float32_to_int32(float32 a, float_status *s)
3341 {
3342     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3343 }
3344 
3345 int64_t float32_to_int64(float32 a, float_status *s)
3346 {
3347     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3348 }
3349 
3350 int16_t float64_to_int16(float64 a, float_status *s)
3351 {
3352     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3353 }
3354 
3355 int32_t float64_to_int32(float64 a, float_status *s)
3356 {
3357     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3358 }
3359 
3360 int64_t float64_to_int64(float64 a, float_status *s)
3361 {
3362     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3363 }
3364 
3365 int32_t float128_to_int32(float128 a, float_status *s)
3366 {
3367     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3368 }
3369 
3370 int64_t float128_to_int64(float128 a, float_status *s)
3371 {
3372     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3373 }
3374 
3375 Int128 float128_to_int128(float128 a, float_status *s)
3376 {
3377     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3378 }
3379 
3380 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3381 {
3382     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3383 }
3384 
3385 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3386 {
3387     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3388 }
3389 
3390 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3391 {
3392     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3393 }
3394 
3395 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3396 {
3397     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3398 }
3399 
3400 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3401 {
3402     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3403 }
3404 
3405 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3406 {
3407     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3408 }
3409 
3410 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3411 {
3412     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3413 }
3414 
3415 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3416 {
3417     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3418 }
3419 
3420 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3421 {
3422     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3423 }
3424 
3425 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3426 {
3427     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3428 }
3429 
3430 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3431 {
3432     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3433 }
3434 
3435 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3436 {
3437     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3438 }
3439 
3440 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3441 {
3442     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3443 }
3444 
3445 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3446 {
3447     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3448 }
3449 
3450 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3451 {
3452     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3453 }
3454 
3455 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3456 {
3457     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3458 }
3459 
3460 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3461 {
3462     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3463 }
3464 
3465 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3466 {
3467     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3468 }
3469 
3470 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3471 {
3472     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3473 }
3474 
3475 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3476 {
3477     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3478 }
3479 
3480 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3481 {
3482     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3483 }
3484 
3485 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3486 {
3487     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3488 }
3489 
3490 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3491 {
3492     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3493 }
3494 
3495 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3496 {
3497     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3498 }
3499 
3500 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3501                                 float_status *s)
3502 {
3503     FloatParts64 p;
3504 
3505     float64_unpack_canonical(&p, a, s);
3506     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3507 }
3508 
3509 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3510                                 float_status *s)
3511 {
3512     FloatParts64 p;
3513 
3514     float64_unpack_canonical(&p, a, s);
3515     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3516 }
3517 
3518 /*
3519  * Floating-point to unsigned integer conversions
3520  */
3521 
3522 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3523                                 float_status *s)
3524 {
3525     FloatParts64 p;
3526 
3527     float16_unpack_canonical(&p, a, s);
3528     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3529 }
3530 
3531 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3532                                   float_status *s)
3533 {
3534     FloatParts64 p;
3535 
3536     float16_unpack_canonical(&p, a, s);
3537     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3538 }
3539 
3540 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3541                                   float_status *s)
3542 {
3543     FloatParts64 p;
3544 
3545     float16_unpack_canonical(&p, a, s);
3546     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3547 }
3548 
3549 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3550                                   float_status *s)
3551 {
3552     FloatParts64 p;
3553 
3554     float16_unpack_canonical(&p, a, s);
3555     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3556 }
3557 
3558 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3559                                   float_status *s)
3560 {
3561     FloatParts64 p;
3562 
3563     float32_unpack_canonical(&p, a, s);
3564     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3565 }
3566 
3567 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3568                                   float_status *s)
3569 {
3570     FloatParts64 p;
3571 
3572     float32_unpack_canonical(&p, a, s);
3573     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3574 }
3575 
3576 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3577                                   float_status *s)
3578 {
3579     FloatParts64 p;
3580 
3581     float32_unpack_canonical(&p, a, s);
3582     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3583 }
3584 
3585 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3586                                   float_status *s)
3587 {
3588     FloatParts64 p;
3589 
3590     float64_unpack_canonical(&p, a, s);
3591     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3592 }
3593 
3594 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3595                                   float_status *s)
3596 {
3597     FloatParts64 p;
3598 
3599     float64_unpack_canonical(&p, a, s);
3600     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3601 }
3602 
3603 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3604                                   float_status *s)
3605 {
3606     FloatParts64 p;
3607 
3608     float64_unpack_canonical(&p, a, s);
3609     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3610 }
3611 
3612 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3613                                  int scale, float_status *s)
3614 {
3615     FloatParts64 p;
3616 
3617     bfloat16_unpack_canonical(&p, a, s);
3618     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3619 }
3620 
3621 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3622                                    int scale, float_status *s)
3623 {
3624     FloatParts64 p;
3625 
3626     bfloat16_unpack_canonical(&p, a, s);
3627     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3628 }
3629 
3630 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3631                                    int scale, float_status *s)
3632 {
3633     FloatParts64 p;
3634 
3635     bfloat16_unpack_canonical(&p, a, s);
3636     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3637 }
3638 
3639 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3640                                    int scale, float_status *s)
3641 {
3642     FloatParts64 p;
3643 
3644     bfloat16_unpack_canonical(&p, a, s);
3645     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3646 }
3647 
3648 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3649                                           int scale, float_status *s)
3650 {
3651     FloatParts128 p;
3652 
3653     float128_unpack_canonical(&p, a, s);
3654     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3655 }
3656 
3657 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3658                                           int scale, float_status *s)
3659 {
3660     FloatParts128 p;
3661 
3662     float128_unpack_canonical(&p, a, s);
3663     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3664 }
3665 
3666 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3667                                          int scale, float_status *s)
3668 {
3669     int flags = 0;
3670     Int128 r;
3671     FloatParts128 p;
3672 
3673     float128_unpack_canonical(&p, a, s);
3674 
3675     switch (p.cls) {
3676     case float_class_snan:
3677         flags |= float_flag_invalid_snan;
3678         /* fall through */
3679     case float_class_qnan:
3680         flags |= float_flag_invalid;
3681         r = UINT128_MAX;
3682         break;
3683 
3684     case float_class_inf:
3685         flags = float_flag_invalid | float_flag_invalid_cvti;
3686         r = p.sign ? int128_zero() : UINT128_MAX;
3687         break;
3688 
3689     case float_class_zero:
3690         return int128_zero();
3691 
3692     case float_class_normal:
3693     case float_class_denormal:
3694         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3695             flags = float_flag_inexact;
3696             if (p.cls == float_class_zero) {
3697                 r = int128_zero();
3698                 break;
3699             }
3700         }
3701 
3702         if (p.sign) {
3703             flags = float_flag_invalid | float_flag_invalid_cvti;
3704             r = int128_zero();
3705         } else if (p.exp <= 127) {
3706             int shift = 127 - p.exp;
3707             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3708         } else {
3709             flags = float_flag_invalid | float_flag_invalid_cvti;
3710             r = UINT128_MAX;
3711         }
3712         break;
3713 
3714     default:
3715         g_assert_not_reached();
3716     }
3717 
3718     float_raise(flags, s);
3719     return r;
3720 }
3721 
3722 uint8_t float16_to_uint8(float16 a, float_status *s)
3723 {
3724     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3725 }
3726 
3727 uint16_t float16_to_uint16(float16 a, float_status *s)
3728 {
3729     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3730 }
3731 
3732 uint32_t float16_to_uint32(float16 a, float_status *s)
3733 {
3734     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3735 }
3736 
3737 uint64_t float16_to_uint64(float16 a, float_status *s)
3738 {
3739     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3740 }
3741 
3742 uint16_t float32_to_uint16(float32 a, float_status *s)
3743 {
3744     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3745 }
3746 
3747 uint32_t float32_to_uint32(float32 a, float_status *s)
3748 {
3749     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3750 }
3751 
3752 uint64_t float32_to_uint64(float32 a, float_status *s)
3753 {
3754     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3755 }
3756 
3757 uint16_t float64_to_uint16(float64 a, float_status *s)
3758 {
3759     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3760 }
3761 
3762 uint32_t float64_to_uint32(float64 a, float_status *s)
3763 {
3764     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3765 }
3766 
3767 uint64_t float64_to_uint64(float64 a, float_status *s)
3768 {
3769     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3770 }
3771 
3772 uint32_t float128_to_uint32(float128 a, float_status *s)
3773 {
3774     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3775 }
3776 
3777 uint64_t float128_to_uint64(float128 a, float_status *s)
3778 {
3779     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3780 }
3781 
3782 Int128 float128_to_uint128(float128 a, float_status *s)
3783 {
3784     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3785 }
3786 
3787 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3788 {
3789     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3790 }
3791 
3792 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3793 {
3794     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3795 }
3796 
3797 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3798 {
3799     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3800 }
3801 
3802 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3803 {
3804     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3805 }
3806 
3807 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3808 {
3809     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3810 }
3811 
3812 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3813 {
3814     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3815 }
3816 
3817 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3818 {
3819     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3820 }
3821 
3822 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3823 {
3824     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3825 }
3826 
3827 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3828 {
3829     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3830 }
3831 
3832 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3833 {
3834     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3835 }
3836 
3837 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3838 {
3839     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3840 }
3841 
3842 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3843 {
3844     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3845 }
3846 
3847 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3848 {
3849     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3850 }
3851 
3852 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3853 {
3854     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3855 }
3856 
3857 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3858 {
3859     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3860 }
3861 
3862 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3863 {
3864     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3865 }
3866 
3867 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3868 {
3869     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3870 }
3871 
3872 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3873 {
3874     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3875 }
3876 
3877 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3878 {
3879     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3880 }
3881 
3882 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3883 {
3884     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3885 }
3886 
3887 /*
3888  * Signed integer to floating-point conversions
3889  */
3890 
3891 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3892 {
3893     FloatParts64 p;
3894 
3895     parts_sint_to_float(&p, a, scale, status);
3896     return float16_round_pack_canonical(&p, status);
3897 }
3898 
3899 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3900 {
3901     return int64_to_float16_scalbn(a, scale, status);
3902 }
3903 
3904 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3905 {
3906     return int64_to_float16_scalbn(a, scale, status);
3907 }
3908 
3909 float16 int64_to_float16(int64_t a, float_status *status)
3910 {
3911     return int64_to_float16_scalbn(a, 0, status);
3912 }
3913 
3914 float16 int32_to_float16(int32_t a, float_status *status)
3915 {
3916     return int64_to_float16_scalbn(a, 0, status);
3917 }
3918 
3919 float16 int16_to_float16(int16_t a, float_status *status)
3920 {
3921     return int64_to_float16_scalbn(a, 0, status);
3922 }
3923 
3924 float16 int8_to_float16(int8_t a, float_status *status)
3925 {
3926     return int64_to_float16_scalbn(a, 0, status);
3927 }
3928 
3929 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3930 {
3931     FloatParts64 p;
3932 
3933     /* Without scaling, there are no overflow concerns. */
3934     if (likely(scale == 0) && can_use_fpu(status)) {
3935         union_float32 ur;
3936         ur.h = a;
3937         return ur.s;
3938     }
3939 
3940     parts64_sint_to_float(&p, a, scale, status);
3941     return float32_round_pack_canonical(&p, status);
3942 }
3943 
3944 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3945 {
3946     return int64_to_float32_scalbn(a, scale, status);
3947 }
3948 
3949 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3950 {
3951     return int64_to_float32_scalbn(a, scale, status);
3952 }
3953 
3954 float32 int64_to_float32(int64_t a, float_status *status)
3955 {
3956     return int64_to_float32_scalbn(a, 0, status);
3957 }
3958 
3959 float32 int32_to_float32(int32_t a, float_status *status)
3960 {
3961     return int64_to_float32_scalbn(a, 0, status);
3962 }
3963 
3964 float32 int16_to_float32(int16_t a, float_status *status)
3965 {
3966     return int64_to_float32_scalbn(a, 0, status);
3967 }
3968 
3969 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3970 {
3971     FloatParts64 p;
3972 
3973     /* Without scaling, there are no overflow concerns. */
3974     if (likely(scale == 0) && can_use_fpu(status)) {
3975         union_float64 ur;
3976         ur.h = a;
3977         return ur.s;
3978     }
3979 
3980     parts_sint_to_float(&p, a, scale, status);
3981     return float64_round_pack_canonical(&p, status);
3982 }
3983 
3984 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3985 {
3986     return int64_to_float64_scalbn(a, scale, status);
3987 }
3988 
3989 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3990 {
3991     return int64_to_float64_scalbn(a, scale, status);
3992 }
3993 
3994 float64 int64_to_float64(int64_t a, float_status *status)
3995 {
3996     return int64_to_float64_scalbn(a, 0, status);
3997 }
3998 
3999 float64 int32_to_float64(int32_t a, float_status *status)
4000 {
4001     return int64_to_float64_scalbn(a, 0, status);
4002 }
4003 
4004 float64 int16_to_float64(int16_t a, float_status *status)
4005 {
4006     return int64_to_float64_scalbn(a, 0, status);
4007 }
4008 
4009 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
4010 {
4011     FloatParts64 p;
4012 
4013     parts_sint_to_float(&p, a, scale, status);
4014     return bfloat16_round_pack_canonical(&p, status);
4015 }
4016 
4017 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
4018 {
4019     return int64_to_bfloat16_scalbn(a, scale, status);
4020 }
4021 
4022 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
4023 {
4024     return int64_to_bfloat16_scalbn(a, scale, status);
4025 }
4026 
4027 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
4028 {
4029     return int64_to_bfloat16_scalbn(a, scale, status);
4030 }
4031 
4032 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
4033 {
4034     return int64_to_bfloat16_scalbn(a, 0, status);
4035 }
4036 
4037 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
4038 {
4039     return int64_to_bfloat16_scalbn(a, 0, status);
4040 }
4041 
4042 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
4043 {
4044     return int64_to_bfloat16_scalbn(a, 0, status);
4045 }
4046 
4047 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
4048 {
4049     return int64_to_bfloat16_scalbn(a, 0, status);
4050 }
4051 
4052 float128 int128_to_float128(Int128 a, float_status *status)
4053 {
4054     FloatParts128 p = { };
4055     int shift;
4056 
4057     if (int128_nz(a)) {
4058         p.cls = float_class_normal;
4059         if (!int128_nonneg(a)) {
4060             p.sign = true;
4061             a = int128_neg(a);
4062         }
4063 
4064         shift = clz64(int128_gethi(a));
4065         if (shift == 64) {
4066             shift += clz64(int128_getlo(a));
4067         }
4068 
4069         p.exp = 127 - shift;
4070         a = int128_lshift(a, shift);
4071 
4072         p.frac_hi = int128_gethi(a);
4073         p.frac_lo = int128_getlo(a);
4074     } else {
4075         p.cls = float_class_zero;
4076     }
4077 
4078     return float128_round_pack_canonical(&p, status);
4079 }
4080 
4081 float128 int64_to_float128(int64_t a, float_status *status)
4082 {
4083     FloatParts128 p;
4084 
4085     parts_sint_to_float(&p, a, 0, status);
4086     return float128_round_pack_canonical(&p, status);
4087 }
4088 
4089 float128 int32_to_float128(int32_t a, float_status *status)
4090 {
4091     return int64_to_float128(a, status);
4092 }
4093 
4094 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4095 {
4096     FloatParts128 p;
4097 
4098     parts_sint_to_float(&p, a, 0, status);
4099     return floatx80_round_pack_canonical(&p, status);
4100 }
4101 
4102 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4103 {
4104     return int64_to_floatx80(a, status);
4105 }
4106 
4107 /*
4108  * Unsigned Integer to floating-point conversions
4109  */
4110 
4111 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4112 {
4113     FloatParts64 p;
4114 
4115     parts_uint_to_float(&p, a, scale, status);
4116     return float16_round_pack_canonical(&p, status);
4117 }
4118 
4119 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4120 {
4121     return uint64_to_float16_scalbn(a, scale, status);
4122 }
4123 
4124 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4125 {
4126     return uint64_to_float16_scalbn(a, scale, status);
4127 }
4128 
4129 float16 uint64_to_float16(uint64_t a, float_status *status)
4130 {
4131     return uint64_to_float16_scalbn(a, 0, status);
4132 }
4133 
4134 float16 uint32_to_float16(uint32_t a, float_status *status)
4135 {
4136     return uint64_to_float16_scalbn(a, 0, status);
4137 }
4138 
4139 float16 uint16_to_float16(uint16_t a, float_status *status)
4140 {
4141     return uint64_to_float16_scalbn(a, 0, status);
4142 }
4143 
4144 float16 uint8_to_float16(uint8_t a, float_status *status)
4145 {
4146     return uint64_to_float16_scalbn(a, 0, status);
4147 }
4148 
4149 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4150 {
4151     FloatParts64 p;
4152 
4153     /* Without scaling, there are no overflow concerns. */
4154     if (likely(scale == 0) && can_use_fpu(status)) {
4155         union_float32 ur;
4156         ur.h = a;
4157         return ur.s;
4158     }
4159 
4160     parts_uint_to_float(&p, a, scale, status);
4161     return float32_round_pack_canonical(&p, status);
4162 }
4163 
4164 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4165 {
4166     return uint64_to_float32_scalbn(a, scale, status);
4167 }
4168 
4169 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4170 {
4171     return uint64_to_float32_scalbn(a, scale, status);
4172 }
4173 
4174 float32 uint64_to_float32(uint64_t a, float_status *status)
4175 {
4176     return uint64_to_float32_scalbn(a, 0, status);
4177 }
4178 
4179 float32 uint32_to_float32(uint32_t a, float_status *status)
4180 {
4181     return uint64_to_float32_scalbn(a, 0, status);
4182 }
4183 
4184 float32 uint16_to_float32(uint16_t a, float_status *status)
4185 {
4186     return uint64_to_float32_scalbn(a, 0, status);
4187 }
4188 
4189 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4190 {
4191     FloatParts64 p;
4192 
4193     /* Without scaling, there are no overflow concerns. */
4194     if (likely(scale == 0) && can_use_fpu(status)) {
4195         union_float64 ur;
4196         ur.h = a;
4197         return ur.s;
4198     }
4199 
4200     parts_uint_to_float(&p, a, scale, status);
4201     return float64_round_pack_canonical(&p, status);
4202 }
4203 
4204 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4205 {
4206     return uint64_to_float64_scalbn(a, scale, status);
4207 }
4208 
4209 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4210 {
4211     return uint64_to_float64_scalbn(a, scale, status);
4212 }
4213 
4214 float64 uint64_to_float64(uint64_t a, float_status *status)
4215 {
4216     return uint64_to_float64_scalbn(a, 0, status);
4217 }
4218 
4219 float64 uint32_to_float64(uint32_t a, float_status *status)
4220 {
4221     return uint64_to_float64_scalbn(a, 0, status);
4222 }
4223 
4224 float64 uint16_to_float64(uint16_t a, float_status *status)
4225 {
4226     return uint64_to_float64_scalbn(a, 0, status);
4227 }
4228 
4229 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4230 {
4231     FloatParts64 p;
4232 
4233     parts_uint_to_float(&p, a, scale, status);
4234     return bfloat16_round_pack_canonical(&p, status);
4235 }
4236 
4237 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4238 {
4239     return uint64_to_bfloat16_scalbn(a, scale, status);
4240 }
4241 
4242 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4243 {
4244     return uint64_to_bfloat16_scalbn(a, scale, status);
4245 }
4246 
4247 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4248 {
4249     return uint64_to_bfloat16_scalbn(a, scale, status);
4250 }
4251 
4252 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4253 {
4254     return uint64_to_bfloat16_scalbn(a, 0, status);
4255 }
4256 
4257 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4258 {
4259     return uint64_to_bfloat16_scalbn(a, 0, status);
4260 }
4261 
4262 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4263 {
4264     return uint64_to_bfloat16_scalbn(a, 0, status);
4265 }
4266 
4267 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4268 {
4269     return uint64_to_bfloat16_scalbn(a, 0, status);
4270 }
4271 
4272 float128 uint64_to_float128(uint64_t a, float_status *status)
4273 {
4274     FloatParts128 p;
4275 
4276     parts_uint_to_float(&p, a, 0, status);
4277     return float128_round_pack_canonical(&p, status);
4278 }
4279 
4280 float128 uint128_to_float128(Int128 a, float_status *status)
4281 {
4282     FloatParts128 p = { };
4283     int shift;
4284 
4285     if (int128_nz(a)) {
4286         p.cls = float_class_normal;
4287 
4288         shift = clz64(int128_gethi(a));
4289         if (shift == 64) {
4290             shift += clz64(int128_getlo(a));
4291         }
4292 
4293         p.exp = 127 - shift;
4294         a = int128_lshift(a, shift);
4295 
4296         p.frac_hi = int128_gethi(a);
4297         p.frac_lo = int128_getlo(a);
4298     } else {
4299         p.cls = float_class_zero;
4300     }
4301 
4302     return float128_round_pack_canonical(&p, status);
4303 }
4304 
4305 /*
4306  * Minimum and maximum
4307  */
4308 
4309 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4310 {
4311     FloatParts64 pa, pb, *pr;
4312 
4313     float16_unpack_canonical(&pa, a, s);
4314     float16_unpack_canonical(&pb, b, s);
4315     pr = parts_minmax(&pa, &pb, s, flags);
4316 
4317     return float16_round_pack_canonical(pr, s);
4318 }
4319 
4320 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4321                                 float_status *s, int flags)
4322 {
4323     FloatParts64 pa, pb, *pr;
4324 
4325     bfloat16_unpack_canonical(&pa, a, s);
4326     bfloat16_unpack_canonical(&pb, b, s);
4327     pr = parts_minmax(&pa, &pb, s, flags);
4328 
4329     return bfloat16_round_pack_canonical(pr, s);
4330 }
4331 
4332 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4333 {
4334     FloatParts64 pa, pb, *pr;
4335 
4336     float32_unpack_canonical(&pa, a, s);
4337     float32_unpack_canonical(&pb, b, s);
4338     pr = parts_minmax(&pa, &pb, s, flags);
4339 
4340     return float32_round_pack_canonical(pr, s);
4341 }
4342 
4343 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4344 {
4345     FloatParts64 pa, pb, *pr;
4346 
4347     float64_unpack_canonical(&pa, a, s);
4348     float64_unpack_canonical(&pb, b, s);
4349     pr = parts_minmax(&pa, &pb, s, flags);
4350 
4351     return float64_round_pack_canonical(pr, s);
4352 }
4353 
4354 static float128 float128_minmax(float128 a, float128 b,
4355                                 float_status *s, int flags)
4356 {
4357     FloatParts128 pa, pb, *pr;
4358 
4359     float128_unpack_canonical(&pa, a, s);
4360     float128_unpack_canonical(&pb, b, s);
4361     pr = parts_minmax(&pa, &pb, s, flags);
4362 
4363     return float128_round_pack_canonical(pr, s);
4364 }
4365 
4366 #define MINMAX_1(type, name, flags) \
4367     type type##_##name(type a, type b, float_status *s) \
4368     { return type##_minmax(a, b, s, flags); }
4369 
4370 #define MINMAX_2(type) \
4371     MINMAX_1(type, max, 0)                                                \
4372     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4373     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4374     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4375     MINMAX_1(type, min, minmax_ismin)                                     \
4376     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4377     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4378     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4379 
4380 MINMAX_2(float16)
4381 MINMAX_2(bfloat16)
4382 MINMAX_2(float32)
4383 MINMAX_2(float64)
4384 MINMAX_2(float128)
4385 
4386 #undef MINMAX_1
4387 #undef MINMAX_2
4388 
4389 /*
4390  * Floating point compare
4391  */
4392 
4393 static FloatRelation QEMU_FLATTEN
4394 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4395 {
4396     FloatParts64 pa, pb;
4397 
4398     float16_unpack_canonical(&pa, a, s);
4399     float16_unpack_canonical(&pb, b, s);
4400     return parts_compare(&pa, &pb, s, is_quiet);
4401 }
4402 
4403 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4404 {
4405     return float16_do_compare(a, b, s, false);
4406 }
4407 
4408 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4409 {
4410     return float16_do_compare(a, b, s, true);
4411 }
4412 
4413 static FloatRelation QEMU_SOFTFLOAT_ATTR
4414 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4415 {
4416     FloatParts64 pa, pb;
4417 
4418     float32_unpack_canonical(&pa, a, s);
4419     float32_unpack_canonical(&pb, b, s);
4420     return parts_compare(&pa, &pb, s, is_quiet);
4421 }
4422 
4423 static FloatRelation QEMU_FLATTEN
4424 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4425 {
4426     union_float32 ua, ub;
4427 
4428     ua.s = xa;
4429     ub.s = xb;
4430 
4431     if (QEMU_NO_HARDFLOAT) {
4432         goto soft;
4433     }
4434 
4435     if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) {
4436         /* We may need to set the input_denormal_used flag */
4437         goto soft;
4438     }
4439 
4440     if (isgreaterequal(ua.h, ub.h)) {
4441         if (isgreater(ua.h, ub.h)) {
4442             return float_relation_greater;
4443         }
4444         return float_relation_equal;
4445     }
4446     if (likely(isless(ua.h, ub.h))) {
4447         return float_relation_less;
4448     }
4449     /*
4450      * The only condition remaining is unordered.
4451      * Fall through to set flags.
4452      */
4453  soft:
4454     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4455 }
4456 
4457 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4458 {
4459     return float32_hs_compare(a, b, s, false);
4460 }
4461 
4462 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4463 {
4464     return float32_hs_compare(a, b, s, true);
4465 }
4466 
4467 static FloatRelation QEMU_SOFTFLOAT_ATTR
4468 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4469 {
4470     FloatParts64 pa, pb;
4471 
4472     float64_unpack_canonical(&pa, a, s);
4473     float64_unpack_canonical(&pb, b, s);
4474     return parts_compare(&pa, &pb, s, is_quiet);
4475 }
4476 
4477 static FloatRelation QEMU_FLATTEN
4478 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4479 {
4480     union_float64 ua, ub;
4481 
4482     ua.s = xa;
4483     ub.s = xb;
4484 
4485     if (QEMU_NO_HARDFLOAT) {
4486         goto soft;
4487     }
4488 
4489     if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) {
4490         /* We may need to set the input_denormal_used flag */
4491         goto soft;
4492     }
4493 
4494     if (isgreaterequal(ua.h, ub.h)) {
4495         if (isgreater(ua.h, ub.h)) {
4496             return float_relation_greater;
4497         }
4498         return float_relation_equal;
4499     }
4500     if (likely(isless(ua.h, ub.h))) {
4501         return float_relation_less;
4502     }
4503     /*
4504      * The only condition remaining is unordered.
4505      * Fall through to set flags.
4506      */
4507  soft:
4508     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4509 }
4510 
4511 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4512 {
4513     return float64_hs_compare(a, b, s, false);
4514 }
4515 
4516 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4517 {
4518     return float64_hs_compare(a, b, s, true);
4519 }
4520 
4521 static FloatRelation QEMU_FLATTEN
4522 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4523 {
4524     FloatParts64 pa, pb;
4525 
4526     bfloat16_unpack_canonical(&pa, a, s);
4527     bfloat16_unpack_canonical(&pb, b, s);
4528     return parts_compare(&pa, &pb, s, is_quiet);
4529 }
4530 
4531 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4532 {
4533     return bfloat16_do_compare(a, b, s, false);
4534 }
4535 
4536 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4537 {
4538     return bfloat16_do_compare(a, b, s, true);
4539 }
4540 
4541 static FloatRelation QEMU_FLATTEN
4542 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4543 {
4544     FloatParts128 pa, pb;
4545 
4546     float128_unpack_canonical(&pa, a, s);
4547     float128_unpack_canonical(&pb, b, s);
4548     return parts_compare(&pa, &pb, s, is_quiet);
4549 }
4550 
4551 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4552 {
4553     return float128_do_compare(a, b, s, false);
4554 }
4555 
4556 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4557 {
4558     return float128_do_compare(a, b, s, true);
4559 }
4560 
4561 static FloatRelation QEMU_FLATTEN
4562 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4563 {
4564     FloatParts128 pa, pb;
4565 
4566     if (!floatx80_unpack_canonical(&pa, a, s) ||
4567         !floatx80_unpack_canonical(&pb, b, s)) {
4568         return float_relation_unordered;
4569     }
4570     return parts_compare(&pa, &pb, s, is_quiet);
4571 }
4572 
4573 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4574 {
4575     return floatx80_do_compare(a, b, s, false);
4576 }
4577 
4578 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4579 {
4580     return floatx80_do_compare(a, b, s, true);
4581 }
4582 
4583 /*
4584  * Scale by 2**N
4585  */
4586 
4587 float16 float16_scalbn(float16 a, int n, float_status *status)
4588 {
4589     FloatParts64 p;
4590 
4591     float16_unpack_canonical(&p, a, status);
4592     parts_scalbn(&p, n, status);
4593     return float16_round_pack_canonical(&p, status);
4594 }
4595 
4596 float32 float32_scalbn(float32 a, int n, float_status *status)
4597 {
4598     FloatParts64 p;
4599 
4600     float32_unpack_canonical(&p, a, status);
4601     parts_scalbn(&p, n, status);
4602     return float32_round_pack_canonical(&p, status);
4603 }
4604 
4605 float64 float64_scalbn(float64 a, int n, float_status *status)
4606 {
4607     FloatParts64 p;
4608 
4609     float64_unpack_canonical(&p, a, status);
4610     parts_scalbn(&p, n, status);
4611     return float64_round_pack_canonical(&p, status);
4612 }
4613 
4614 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4615 {
4616     FloatParts64 p;
4617 
4618     bfloat16_unpack_canonical(&p, a, status);
4619     parts_scalbn(&p, n, status);
4620     return bfloat16_round_pack_canonical(&p, status);
4621 }
4622 
4623 float128 float128_scalbn(float128 a, int n, float_status *status)
4624 {
4625     FloatParts128 p;
4626 
4627     float128_unpack_canonical(&p, a, status);
4628     parts_scalbn(&p, n, status);
4629     return float128_round_pack_canonical(&p, status);
4630 }
4631 
4632 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4633 {
4634     FloatParts128 p;
4635 
4636     if (!floatx80_unpack_canonical(&p, a, status)) {
4637         return floatx80_default_nan(status);
4638     }
4639     parts_scalbn(&p, n, status);
4640     return floatx80_round_pack_canonical(&p, status);
4641 }
4642 
4643 /*
4644  * Square Root
4645  */
4646 
4647 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4648 {
4649     FloatParts64 p;
4650 
4651     float16_unpack_canonical(&p, a, status);
4652     parts_sqrt(&p, status, &float16_params);
4653     return float16_round_pack_canonical(&p, status);
4654 }
4655 
4656 static float32 QEMU_SOFTFLOAT_ATTR
4657 soft_f32_sqrt(float32 a, float_status *status)
4658 {
4659     FloatParts64 p;
4660 
4661     float32_unpack_canonical(&p, a, status);
4662     parts_sqrt(&p, status, &float32_params);
4663     return float32_round_pack_canonical(&p, status);
4664 }
4665 
4666 static float64 QEMU_SOFTFLOAT_ATTR
4667 soft_f64_sqrt(float64 a, float_status *status)
4668 {
4669     FloatParts64 p;
4670 
4671     float64_unpack_canonical(&p, a, status);
4672     parts_sqrt(&p, status, &float64_params);
4673     return float64_round_pack_canonical(&p, status);
4674 }
4675 
4676 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4677 {
4678     union_float32 ua, ur;
4679 
4680     ua.s = xa;
4681     if (unlikely(!can_use_fpu(s))) {
4682         goto soft;
4683     }
4684 
4685     float32_input_flush1(&ua.s, s);
4686     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4687         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4688                        fpclassify(ua.h) == FP_ZERO) ||
4689                      signbit(ua.h))) {
4690             goto soft;
4691         }
4692     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4693                         float32_is_neg(ua.s))) {
4694         goto soft;
4695     }
4696     ur.h = sqrtf(ua.h);
4697     return ur.s;
4698 
4699  soft:
4700     return soft_f32_sqrt(ua.s, s);
4701 }
4702 
4703 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4704 {
4705     union_float64 ua, ur;
4706 
4707     ua.s = xa;
4708     if (unlikely(!can_use_fpu(s))) {
4709         goto soft;
4710     }
4711 
4712     float64_input_flush1(&ua.s, s);
4713     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4714         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4715                        fpclassify(ua.h) == FP_ZERO) ||
4716                      signbit(ua.h))) {
4717             goto soft;
4718         }
4719     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4720                         float64_is_neg(ua.s))) {
4721         goto soft;
4722     }
4723     ur.h = sqrt(ua.h);
4724     return ur.s;
4725 
4726  soft:
4727     return soft_f64_sqrt(ua.s, s);
4728 }
4729 
4730 float64 float64r32_sqrt(float64 a, float_status *status)
4731 {
4732     FloatParts64 p;
4733 
4734     float64_unpack_canonical(&p, a, status);
4735     parts_sqrt(&p, status, &float64_params);
4736     return float64r32_round_pack_canonical(&p, status);
4737 }
4738 
4739 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4740 {
4741     FloatParts64 p;
4742 
4743     bfloat16_unpack_canonical(&p, a, status);
4744     parts_sqrt(&p, status, &bfloat16_params);
4745     return bfloat16_round_pack_canonical(&p, status);
4746 }
4747 
4748 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4749 {
4750     FloatParts128 p;
4751 
4752     float128_unpack_canonical(&p, a, status);
4753     parts_sqrt(&p, status, &float128_params);
4754     return float128_round_pack_canonical(&p, status);
4755 }
4756 
4757 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4758 {
4759     FloatParts128 p;
4760 
4761     if (!floatx80_unpack_canonical(&p, a, s)) {
4762         return floatx80_default_nan(s);
4763     }
4764     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4765     return floatx80_round_pack_canonical(&p, s);
4766 }
4767 
4768 /*
4769  * log2
4770  */
4771 float32 float32_log2(float32 a, float_status *status)
4772 {
4773     FloatParts64 p;
4774 
4775     float32_unpack_canonical(&p, a, status);
4776     parts_log2(&p, status, &float32_params);
4777     return float32_round_pack_canonical(&p, status);
4778 }
4779 
4780 float64 float64_log2(float64 a, float_status *status)
4781 {
4782     FloatParts64 p;
4783 
4784     float64_unpack_canonical(&p, a, status);
4785     parts_log2(&p, status, &float64_params);
4786     return float64_round_pack_canonical(&p, status);
4787 }
4788 
4789 /*----------------------------------------------------------------------------
4790 | The pattern for a default generated NaN.
4791 *----------------------------------------------------------------------------*/
4792 
4793 float16 float16_default_nan(float_status *status)
4794 {
4795     FloatParts64 p;
4796 
4797     parts_default_nan(&p, status);
4798     p.frac >>= float16_params.frac_shift;
4799     return float16_pack_raw(&p);
4800 }
4801 
4802 float32 float32_default_nan(float_status *status)
4803 {
4804     FloatParts64 p;
4805 
4806     parts_default_nan(&p, status);
4807     p.frac >>= float32_params.frac_shift;
4808     return float32_pack_raw(&p);
4809 }
4810 
4811 float64 float64_default_nan(float_status *status)
4812 {
4813     FloatParts64 p;
4814 
4815     parts_default_nan(&p, status);
4816     p.frac >>= float64_params.frac_shift;
4817     return float64_pack_raw(&p);
4818 }
4819 
4820 float128 float128_default_nan(float_status *status)
4821 {
4822     FloatParts128 p;
4823 
4824     parts_default_nan(&p, status);
4825     frac_shr(&p, float128_params.frac_shift);
4826     return float128_pack_raw(&p);
4827 }
4828 
4829 bfloat16 bfloat16_default_nan(float_status *status)
4830 {
4831     FloatParts64 p;
4832 
4833     parts_default_nan(&p, status);
4834     p.frac >>= bfloat16_params.frac_shift;
4835     return bfloat16_pack_raw(&p);
4836 }
4837 
4838 /*----------------------------------------------------------------------------
4839 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4840 *----------------------------------------------------------------------------*/
4841 
4842 float16 float16_silence_nan(float16 a, float_status *status)
4843 {
4844     FloatParts64 p;
4845 
4846     float16_unpack_raw(&p, a);
4847     p.frac <<= float16_params.frac_shift;
4848     parts_silence_nan(&p, status);
4849     p.frac >>= float16_params.frac_shift;
4850     return float16_pack_raw(&p);
4851 }
4852 
4853 float32 float32_silence_nan(float32 a, float_status *status)
4854 {
4855     FloatParts64 p;
4856 
4857     float32_unpack_raw(&p, a);
4858     p.frac <<= float32_params.frac_shift;
4859     parts_silence_nan(&p, status);
4860     p.frac >>= float32_params.frac_shift;
4861     return float32_pack_raw(&p);
4862 }
4863 
4864 float64 float64_silence_nan(float64 a, float_status *status)
4865 {
4866     FloatParts64 p;
4867 
4868     float64_unpack_raw(&p, a);
4869     p.frac <<= float64_params.frac_shift;
4870     parts_silence_nan(&p, status);
4871     p.frac >>= float64_params.frac_shift;
4872     return float64_pack_raw(&p);
4873 }
4874 
4875 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4876 {
4877     FloatParts64 p;
4878 
4879     bfloat16_unpack_raw(&p, a);
4880     p.frac <<= bfloat16_params.frac_shift;
4881     parts_silence_nan(&p, status);
4882     p.frac >>= bfloat16_params.frac_shift;
4883     return bfloat16_pack_raw(&p);
4884 }
4885 
4886 float128 float128_silence_nan(float128 a, float_status *status)
4887 {
4888     FloatParts128 p;
4889 
4890     float128_unpack_raw(&p, a);
4891     frac_shl(&p, float128_params.frac_shift);
4892     parts_silence_nan(&p, status);
4893     frac_shr(&p, float128_params.frac_shift);
4894     return float128_pack_raw(&p);
4895 }
4896 
4897 /*----------------------------------------------------------------------------
4898 | If `a' is denormal and we are in flush-to-zero mode then set the
4899 | input-denormal exception and return zero. Otherwise just return the value.
4900 *----------------------------------------------------------------------------*/
4901 
4902 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4903 {
4904     if (p.exp == 0 && p.frac != 0) {
4905         float_raise(float_flag_input_denormal_flushed, status);
4906         return true;
4907     }
4908 
4909     return false;
4910 }
4911 
4912 float16 float16_squash_input_denormal(float16 a, float_status *status)
4913 {
4914     if (status->flush_inputs_to_zero) {
4915         FloatParts64 p;
4916 
4917         float16_unpack_raw(&p, a);
4918         if (parts_squash_denormal(p, status)) {
4919             return float16_set_sign(float16_zero, p.sign);
4920         }
4921     }
4922     return a;
4923 }
4924 
4925 float32 float32_squash_input_denormal(float32 a, float_status *status)
4926 {
4927     if (status->flush_inputs_to_zero) {
4928         FloatParts64 p;
4929 
4930         float32_unpack_raw(&p, a);
4931         if (parts_squash_denormal(p, status)) {
4932             return float32_set_sign(float32_zero, p.sign);
4933         }
4934     }
4935     return a;
4936 }
4937 
4938 float64 float64_squash_input_denormal(float64 a, float_status *status)
4939 {
4940     if (status->flush_inputs_to_zero) {
4941         FloatParts64 p;
4942 
4943         float64_unpack_raw(&p, a);
4944         if (parts_squash_denormal(p, status)) {
4945             return float64_set_sign(float64_zero, p.sign);
4946         }
4947     }
4948     return a;
4949 }
4950 
4951 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4952 {
4953     if (status->flush_inputs_to_zero) {
4954         FloatParts64 p;
4955 
4956         bfloat16_unpack_raw(&p, a);
4957         if (parts_squash_denormal(p, status)) {
4958             return bfloat16_set_sign(bfloat16_zero, p.sign);
4959         }
4960     }
4961     return a;
4962 }
4963 
4964 /*----------------------------------------------------------------------------
4965 | Normalizes the subnormal extended double-precision floating-point value
4966 | represented by the denormalized significand `aSig'.  The normalized exponent
4967 | and significand are stored at the locations pointed to by `zExpPtr' and
4968 | `zSigPtr', respectively.
4969 *----------------------------------------------------------------------------*/
4970 
4971 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4972                                 uint64_t *zSigPtr)
4973 {
4974     int8_t shiftCount;
4975 
4976     shiftCount = clz64(aSig);
4977     *zSigPtr = aSig<<shiftCount;
4978     *zExpPtr = 1 - shiftCount;
4979 }
4980 
4981 /*----------------------------------------------------------------------------
4982 | Takes two extended double-precision floating-point values `a' and `b', one
4983 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
4984 | `b' is a signaling NaN, the invalid exception is raised.
4985 *----------------------------------------------------------------------------*/
4986 
4987 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
4988 {
4989     FloatParts128 pa, pb, *pr;
4990 
4991     if (!floatx80_unpack_canonical(&pa, a, status) ||
4992         !floatx80_unpack_canonical(&pb, b, status)) {
4993         return floatx80_default_nan(status);
4994     }
4995 
4996     pr = parts_pick_nan(&pa, &pb, status);
4997     return floatx80_round_pack_canonical(pr, status);
4998 }
4999 
5000 /*----------------------------------------------------------------------------
5001 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5002 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
5003 | and returns the proper extended double-precision floating-point value
5004 | corresponding to the abstract input.  Ordinarily, the abstract value is
5005 | rounded and packed into the extended double-precision format, with the
5006 | inexact exception raised if the abstract input cannot be represented
5007 | exactly.  However, if the abstract value is too large, the overflow and
5008 | inexact exceptions are raised and an infinity or maximal finite value is
5009 | returned.  If the abstract value is too small, the input value is rounded to
5010 | a subnormal number, and the underflow and inexact exceptions are raised if
5011 | the abstract input cannot be represented exactly as a subnormal extended
5012 | double-precision floating-point number.
5013 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
5014 | the result is rounded to the same number of bits as single or double
5015 | precision, respectively.  Otherwise, the result is rounded to the full
5016 | precision of the extended double-precision format.
5017 |     The input significand must be normalized or smaller.  If the input
5018 | significand is not normalized, `zExp' must be 0; in that case, the result
5019 | returned is a subnormal number, and it must not require rounding.  The
5020 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
5021 | Floating-Point Arithmetic.
5022 *----------------------------------------------------------------------------*/
5023 
5024 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
5025                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
5026                               float_status *status)
5027 {
5028     FloatRoundMode roundingMode;
5029     bool roundNearestEven, increment, isTiny;
5030     int64_t roundIncrement, roundMask, roundBits;
5031 
5032     roundingMode = status->float_rounding_mode;
5033     roundNearestEven = ( roundingMode == float_round_nearest_even );
5034     switch (roundingPrecision) {
5035     case floatx80_precision_x:
5036         goto precision80;
5037     case floatx80_precision_d:
5038         roundIncrement = UINT64_C(0x0000000000000400);
5039         roundMask = UINT64_C(0x00000000000007FF);
5040         break;
5041     case floatx80_precision_s:
5042         roundIncrement = UINT64_C(0x0000008000000000);
5043         roundMask = UINT64_C(0x000000FFFFFFFFFF);
5044         break;
5045     default:
5046         g_assert_not_reached();
5047     }
5048     zSig0 |= ( zSig1 != 0 );
5049     switch (roundingMode) {
5050     case float_round_nearest_even:
5051     case float_round_ties_away:
5052         break;
5053     case float_round_to_zero:
5054         roundIncrement = 0;
5055         break;
5056     case float_round_up:
5057         roundIncrement = zSign ? 0 : roundMask;
5058         break;
5059     case float_round_down:
5060         roundIncrement = zSign ? roundMask : 0;
5061         break;
5062     default:
5063         abort();
5064     }
5065     roundBits = zSig0 & roundMask;
5066     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5067         if (    ( 0x7FFE < zExp )
5068              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
5069            ) {
5070             goto overflow;
5071         }
5072         if ( zExp <= 0 ) {
5073             if (status->flush_to_zero) {
5074                 float_raise(float_flag_output_denormal_flushed, status);
5075                 return packFloatx80(zSign, 0, 0);
5076             }
5077             isTiny = status->tininess_before_rounding
5078                   || (zExp < 0 )
5079                   || (zSig0 <= zSig0 + roundIncrement);
5080             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5081             zExp = 0;
5082             roundBits = zSig0 & roundMask;
5083             if (isTiny && roundBits) {
5084                 float_raise(float_flag_underflow, status);
5085             }
5086             if (roundBits) {
5087                 float_raise(float_flag_inexact, status);
5088             }
5089             zSig0 += roundIncrement;
5090             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5091             roundIncrement = roundMask + 1;
5092             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5093                 roundMask |= roundIncrement;
5094             }
5095             zSig0 &= ~ roundMask;
5096             return packFloatx80( zSign, zExp, zSig0 );
5097         }
5098     }
5099     if (roundBits) {
5100         float_raise(float_flag_inexact, status);
5101     }
5102     zSig0 += roundIncrement;
5103     if ( zSig0 < roundIncrement ) {
5104         ++zExp;
5105         zSig0 = UINT64_C(0x8000000000000000);
5106     }
5107     roundIncrement = roundMask + 1;
5108     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5109         roundMask |= roundIncrement;
5110     }
5111     zSig0 &= ~ roundMask;
5112     if ( zSig0 == 0 ) zExp = 0;
5113     return packFloatx80( zSign, zExp, zSig0 );
5114  precision80:
5115     switch (roundingMode) {
5116     case float_round_nearest_even:
5117     case float_round_ties_away:
5118         increment = ((int64_t)zSig1 < 0);
5119         break;
5120     case float_round_to_zero:
5121         increment = 0;
5122         break;
5123     case float_round_up:
5124         increment = !zSign && zSig1;
5125         break;
5126     case float_round_down:
5127         increment = zSign && zSig1;
5128         break;
5129     default:
5130         abort();
5131     }
5132     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5133         if (    ( 0x7FFE < zExp )
5134              || (    ( zExp == 0x7FFE )
5135                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5136                   && increment
5137                 )
5138            ) {
5139             roundMask = 0;
5140  overflow:
5141             float_raise(float_flag_overflow | float_flag_inexact, status);
5142             if (    ( roundingMode == float_round_to_zero )
5143                  || ( zSign && ( roundingMode == float_round_up ) )
5144                  || ( ! zSign && ( roundingMode == float_round_down ) )
5145                ) {
5146                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5147             }
5148             return floatx80_default_inf(zSign, status);
5149         }
5150         if ( zExp <= 0 ) {
5151             isTiny = status->tininess_before_rounding
5152                   || (zExp < 0)
5153                   || !increment
5154                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5155             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5156             zExp = 0;
5157             if (isTiny && zSig1) {
5158                 float_raise(float_flag_underflow, status);
5159             }
5160             if (zSig1) {
5161                 float_raise(float_flag_inexact, status);
5162             }
5163             switch (roundingMode) {
5164             case float_round_nearest_even:
5165             case float_round_ties_away:
5166                 increment = ((int64_t)zSig1 < 0);
5167                 break;
5168             case float_round_to_zero:
5169                 increment = 0;
5170                 break;
5171             case float_round_up:
5172                 increment = !zSign && zSig1;
5173                 break;
5174             case float_round_down:
5175                 increment = zSign && zSig1;
5176                 break;
5177             default:
5178                 abort();
5179             }
5180             if ( increment ) {
5181                 ++zSig0;
5182                 if (!(zSig1 << 1) && roundNearestEven) {
5183                     zSig0 &= ~1;
5184                 }
5185                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5186             }
5187             return packFloatx80( zSign, zExp, zSig0 );
5188         }
5189     }
5190     if (zSig1) {
5191         float_raise(float_flag_inexact, status);
5192     }
5193     if ( increment ) {
5194         ++zSig0;
5195         if ( zSig0 == 0 ) {
5196             ++zExp;
5197             zSig0 = UINT64_C(0x8000000000000000);
5198         }
5199         else {
5200             if (!(zSig1 << 1) && roundNearestEven) {
5201                 zSig0 &= ~1;
5202             }
5203         }
5204     }
5205     else {
5206         if ( zSig0 == 0 ) zExp = 0;
5207     }
5208     return packFloatx80( zSign, zExp, zSig0 );
5209 
5210 }
5211 
5212 /*----------------------------------------------------------------------------
5213 | Takes an abstract floating-point value having sign `zSign', exponent
5214 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5215 | and returns the proper extended double-precision floating-point value
5216 | corresponding to the abstract input.  This routine is just like
5217 | `roundAndPackFloatx80' except that the input significand does not have to be
5218 | normalized.
5219 *----------------------------------------------------------------------------*/
5220 
5221 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5222                                        bool zSign, int32_t zExp,
5223                                        uint64_t zSig0, uint64_t zSig1,
5224                                        float_status *status)
5225 {
5226     int8_t shiftCount;
5227 
5228     if ( zSig0 == 0 ) {
5229         zSig0 = zSig1;
5230         zSig1 = 0;
5231         zExp -= 64;
5232     }
5233     shiftCount = clz64(zSig0);
5234     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5235     zExp -= shiftCount;
5236     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5237                                 zSig0, zSig1, status);
5238 
5239 }
5240 
5241 /*----------------------------------------------------------------------------
5242 | Returns the binary exponential of the single-precision floating-point value
5243 | `a'. The operation is performed according to the IEC/IEEE Standard for
5244 | Binary Floating-Point Arithmetic.
5245 |
5246 | Uses the following identities:
5247 |
5248 | 1. -------------------------------------------------------------------------
5249 |      x    x*ln(2)
5250 |     2  = e
5251 |
5252 | 2. -------------------------------------------------------------------------
5253 |                      2     3     4     5           n
5254 |      x        x     x     x     x     x           x
5255 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5256 |               1!    2!    3!    4!    5!          n!
5257 *----------------------------------------------------------------------------*/
5258 
5259 static const float64 float32_exp2_coefficients[15] =
5260 {
5261     const_float64( 0x3ff0000000000000ll ), /*  1 */
5262     const_float64( 0x3fe0000000000000ll ), /*  2 */
5263     const_float64( 0x3fc5555555555555ll ), /*  3 */
5264     const_float64( 0x3fa5555555555555ll ), /*  4 */
5265     const_float64( 0x3f81111111111111ll ), /*  5 */
5266     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5267     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5268     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5269     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5270     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5271     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5272     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5273     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5274     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5275     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5276 };
5277 
5278 float32 float32_exp2(float32 a, float_status *status)
5279 {
5280     FloatParts64 xp, xnp, tp, rp;
5281     int i;
5282 
5283     float32_unpack_canonical(&xp, a, status);
5284     if (unlikely(xp.cls != float_class_normal)) {
5285         switch (xp.cls) {
5286         case float_class_denormal:
5287             break;
5288         case float_class_snan:
5289         case float_class_qnan:
5290             parts_return_nan(&xp, status);
5291             return float32_round_pack_canonical(&xp, status);
5292         case float_class_inf:
5293             return xp.sign ? float32_zero : a;
5294         case float_class_zero:
5295             return float32_one;
5296         default:
5297             g_assert_not_reached();
5298         }
5299     }
5300 
5301     float_raise(float_flag_inexact, status);
5302 
5303     float64_unpack_canonical(&tp, float64_ln2, status);
5304     xp = *parts_mul(&xp, &tp, status);
5305     xnp = xp;
5306 
5307     float64_unpack_canonical(&rp, float64_one, status);
5308     for (i = 0 ; i < 15 ; i++) {
5309 
5310         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5311         rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
5312         xnp = *parts_mul(&xnp, &xp, status);
5313     }
5314 
5315     return float32_round_pack_canonical(&rp, status);
5316 }
5317 
5318 /*----------------------------------------------------------------------------
5319 | Rounds the extended double-precision floating-point value `a'
5320 | to the precision provided by floatx80_rounding_precision and returns the
5321 | result as an extended double-precision floating-point value.
5322 | The operation is performed according to the IEC/IEEE Standard for Binary
5323 | Floating-Point Arithmetic.
5324 *----------------------------------------------------------------------------*/
5325 
5326 floatx80 floatx80_round(floatx80 a, float_status *status)
5327 {
5328     FloatParts128 p;
5329 
5330     if (!floatx80_unpack_canonical(&p, a, status)) {
5331         return floatx80_default_nan(status);
5332     }
5333     return floatx80_round_pack_canonical(&p, status);
5334 }
5335 
5336 static void __attribute__((constructor)) softfloat_init(void)
5337 {
5338     union_float64 ua, ub, uc, ur;
5339 
5340     if (QEMU_NO_HARDFLOAT) {
5341         return;
5342     }
5343     /*
5344      * Test that the host's FMA is not obviously broken. For example,
5345      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5346      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5347      */
5348     ua.s = 0x0020000000000001ULL;
5349     ub.s = 0x3ca0000000000000ULL;
5350     uc.s = 0x0020000000000000ULL;
5351     ur.h = fma(ua.h, ub.h, uc.h);
5352     if (ur.s != 0x0020000000000001ULL) {
5353         force_soft_fma = true;
5354     }
5355 }
5356