xref: /qemu/fpu/softfloat.c (revision 7ad6a567570ed8cd829c4f05a006e2ba2c86fa4a)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal_flushed, s);          \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  *
408  * Note that we canonicalize denormals, so most code should treat
409  * class_normal and class_denormal identically.
410  */
411 
412 typedef enum __attribute__ ((__packed__)) {
413     float_class_unclassified,
414     float_class_zero,
415     float_class_normal,
416     float_class_denormal, /* input was a non-squashed denormal */
417     float_class_inf,
418     float_class_qnan,  /* all NaNs from here */
419     float_class_snan,
420 } FloatClass;
421 
422 #define float_cmask(bit)  (1u << (bit))
423 
424 enum {
425     float_cmask_zero    = float_cmask(float_class_zero),
426     float_cmask_normal  = float_cmask(float_class_normal),
427     float_cmask_denormal = float_cmask(float_class_denormal),
428     float_cmask_inf     = float_cmask(float_class_inf),
429     float_cmask_qnan    = float_cmask(float_class_qnan),
430     float_cmask_snan    = float_cmask(float_class_snan),
431 
432     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
433     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
434     float_cmask_anynorm = float_cmask_normal | float_cmask_denormal,
435 };
436 
437 /* Flags for parts_minmax. */
438 enum {
439     /* Set for minimum; clear for maximum. */
440     minmax_ismin = 1,
441     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
442     minmax_isnum = 2,
443     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
444     minmax_ismag = 4,
445     /*
446      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
447      * operations.
448      */
449     minmax_isnumber = 8,
450 };
451 
452 /* Simple helpers for checking if, or what kind of, NaN we have */
453 static inline __attribute__((unused)) bool is_nan(FloatClass c)
454 {
455     return unlikely(c >= float_class_qnan);
456 }
457 
458 static inline __attribute__((unused)) bool is_snan(FloatClass c)
459 {
460     return c == float_class_snan;
461 }
462 
463 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
464 {
465     return c == float_class_qnan;
466 }
467 
468 /*
469  * Return true if the float_cmask has only normals in it
470  * (including input denormals that were canonicalized)
471  */
472 static inline bool cmask_is_only_normals(int cmask)
473 {
474     return !(cmask & ~float_cmask_anynorm);
475 }
476 
477 static inline bool is_anynorm(FloatClass c)
478 {
479     return float_cmask(c) & float_cmask_anynorm;
480 }
481 
482 /*
483  * Structure holding all of the decomposed parts of a float.
484  * The exponent is unbiased and the fraction is normalized.
485  *
486  * The fraction words are stored in big-endian word ordering,
487  * so that truncation from a larger format to a smaller format
488  * can be done simply by ignoring subsequent elements.
489  */
490 
491 typedef struct {
492     FloatClass cls;
493     bool sign;
494     int32_t exp;
495     union {
496         /* Routines that know the structure may reference the singular name. */
497         uint64_t frac;
498         /*
499          * Routines expanded with multiple structures reference "hi" and "lo"
500          * depending on the operation.  In FloatParts64, "hi" and "lo" are
501          * both the same word and aliased here.
502          */
503         uint64_t frac_hi;
504         uint64_t frac_lo;
505     };
506 } FloatParts64;
507 
508 typedef struct {
509     FloatClass cls;
510     bool sign;
511     int32_t exp;
512     uint64_t frac_hi;
513     uint64_t frac_lo;
514 } FloatParts128;
515 
516 typedef struct {
517     FloatClass cls;
518     bool sign;
519     int32_t exp;
520     uint64_t frac_hi;
521     uint64_t frac_hm;  /* high-middle */
522     uint64_t frac_lm;  /* low-middle */
523     uint64_t frac_lo;
524 } FloatParts256;
525 
526 /* These apply to the most significant word of each FloatPartsN. */
527 #define DECOMPOSED_BINARY_POINT    63
528 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
529 
530 /* Structure holding all of the relevant parameters for a format.
531  *   exp_size: the size of the exponent field
532  *   exp_bias: the offset applied to the exponent field
533  *   exp_max: the maximum normalised exponent
534  *   frac_size: the size of the fraction field
535  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
536  * The following are computed based the size of fraction
537  *   round_mask: bits below lsb which must be rounded
538  * The following optional modifiers are available:
539  *   arm_althp: handle ARM Alternative Half Precision
540  *   m68k_denormal: explicit integer bit for extended precision may be 1
541  */
542 typedef struct {
543     int exp_size;
544     int exp_bias;
545     int exp_re_bias;
546     int exp_max;
547     int frac_size;
548     int frac_shift;
549     bool arm_althp;
550     bool m68k_denormal;
551     uint64_t round_mask;
552 } FloatFmt;
553 
554 /* Expand fields based on the size of exponent and fraction */
555 #define FLOAT_PARAMS_(E)                                \
556     .exp_size       = E,                                \
557     .exp_bias       = ((1 << E) - 1) >> 1,              \
558     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
559     .exp_max        = (1 << E) - 1
560 
561 #define FLOAT_PARAMS(E, F)                              \
562     FLOAT_PARAMS_(E),                                   \
563     .frac_size      = F,                                \
564     .frac_shift     = (-F - 1) & 63,                    \
565     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
566 
567 static const FloatFmt float16_params = {
568     FLOAT_PARAMS(5, 10)
569 };
570 
571 static const FloatFmt float16_params_ahp = {
572     FLOAT_PARAMS(5, 10),
573     .arm_althp = true
574 };
575 
576 static const FloatFmt bfloat16_params = {
577     FLOAT_PARAMS(8, 7)
578 };
579 
580 static const FloatFmt float32_params = {
581     FLOAT_PARAMS(8, 23)
582 };
583 
584 static const FloatFmt float64_params = {
585     FLOAT_PARAMS(11, 52)
586 };
587 
588 static const FloatFmt float128_params = {
589     FLOAT_PARAMS(15, 112)
590 };
591 
592 #define FLOATX80_PARAMS(R)              \
593     FLOAT_PARAMS_(15),                  \
594     .frac_size = R == 64 ? 63 : R,      \
595     .frac_shift = 0,                    \
596     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
597 
598 static const FloatFmt floatx80_params[3] = {
599     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
600     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
601     [floatx80_precision_x] = {
602         FLOATX80_PARAMS(64),
603 #ifdef TARGET_M68K
604         .m68k_denormal = true,
605 #endif
606     },
607 };
608 
609 /* Unpack a float to parts, but do not canonicalize.  */
610 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
611 {
612     const int f_size = fmt->frac_size;
613     const int e_size = fmt->exp_size;
614 
615     *r = (FloatParts64) {
616         .cls = float_class_unclassified,
617         .sign = extract64(raw, f_size + e_size, 1),
618         .exp = extract64(raw, f_size, e_size),
619         .frac = extract64(raw, 0, f_size)
620     };
621 }
622 
623 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
624 {
625     unpack_raw64(p, &float16_params, f);
626 }
627 
628 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
629 {
630     unpack_raw64(p, &bfloat16_params, f);
631 }
632 
633 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
634 {
635     unpack_raw64(p, &float32_params, f);
636 }
637 
638 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
639 {
640     unpack_raw64(p, &float64_params, f);
641 }
642 
643 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
644 {
645     *p = (FloatParts128) {
646         .cls = float_class_unclassified,
647         .sign = extract32(f.high, 15, 1),
648         .exp = extract32(f.high, 0, 15),
649         .frac_hi = f.low
650     };
651 }
652 
653 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
654 {
655     const int f_size = float128_params.frac_size - 64;
656     const int e_size = float128_params.exp_size;
657 
658     *p = (FloatParts128) {
659         .cls = float_class_unclassified,
660         .sign = extract64(f.high, f_size + e_size, 1),
661         .exp = extract64(f.high, f_size, e_size),
662         .frac_hi = extract64(f.high, 0, f_size),
663         .frac_lo = f.low,
664     };
665 }
666 
667 /* Pack a float from parts, but do not canonicalize.  */
668 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
669 {
670     const int f_size = fmt->frac_size;
671     const int e_size = fmt->exp_size;
672     uint64_t ret;
673 
674     ret = (uint64_t)p->sign << (f_size + e_size);
675     ret = deposit64(ret, f_size, e_size, p->exp);
676     ret = deposit64(ret, 0, f_size, p->frac);
677     return ret;
678 }
679 
680 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
681 {
682     return make_float16(pack_raw64(p, &float16_params));
683 }
684 
685 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
686 {
687     return pack_raw64(p, &bfloat16_params);
688 }
689 
690 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
691 {
692     return make_float32(pack_raw64(p, &float32_params));
693 }
694 
695 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
696 {
697     return make_float64(pack_raw64(p, &float64_params));
698 }
699 
700 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
701 {
702     const int f_size = float128_params.frac_size - 64;
703     const int e_size = float128_params.exp_size;
704     uint64_t hi;
705 
706     hi = (uint64_t)p->sign << (f_size + e_size);
707     hi = deposit64(hi, f_size, e_size, p->exp);
708     hi = deposit64(hi, 0, f_size, p->frac_hi);
709     return make_float128(hi, p->frac_lo);
710 }
711 
712 /*----------------------------------------------------------------------------
713 | Functions and definitions to determine:  (1) whether tininess for underflow
714 | is detected before or after rounding by default, (2) what (if anything)
715 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
716 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
717 | are propagated from function inputs to output.  These details are target-
718 | specific.
719 *----------------------------------------------------------------------------*/
720 #include "softfloat-specialize.c.inc"
721 
722 #define PARTS_GENERIC_64_128(NAME, P) \
723     _Generic((P), FloatParts64 *: parts64_##NAME, \
724                   FloatParts128 *: parts128_##NAME)
725 
726 #define PARTS_GENERIC_64_128_256(NAME, P) \
727     _Generic((P), FloatParts64 *: parts64_##NAME, \
728                   FloatParts128 *: parts128_##NAME, \
729                   FloatParts256 *: parts256_##NAME)
730 
731 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
732 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
733 
734 static void parts64_return_nan(FloatParts64 *a, float_status *s);
735 static void parts128_return_nan(FloatParts128 *a, float_status *s);
736 
737 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
738 
739 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
740                                       float_status *s);
741 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
742                                         float_status *s);
743 
744 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
745 
746 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
747                                              FloatParts64 *c, float_status *s,
748                                              int ab_mask, int abc_mask);
749 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
750                                                FloatParts128 *b,
751                                                FloatParts128 *c,
752                                                float_status *s,
753                                                int ab_mask, int abc_mask);
754 
755 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
756     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
757 
758 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
759                                  const FloatFmt *fmt);
760 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
761                                   const FloatFmt *fmt);
762 
763 #define parts_canonicalize(A, S, F) \
764     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
765 
766 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
767                                    const FloatFmt *fmt);
768 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
769                                     const FloatFmt *fmt);
770 
771 #define parts_uncanon_normal(A, S, F) \
772     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
773 
774 static void parts64_uncanon(FloatParts64 *p, float_status *status,
775                             const FloatFmt *fmt);
776 static void parts128_uncanon(FloatParts128 *p, float_status *status,
777                              const FloatFmt *fmt);
778 
779 #define parts_uncanon(A, S, F) \
780     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
781 
782 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
783 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
784 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
785 
786 #define parts_add_normal(A, B) \
787     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
788 
789 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
790 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
791 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
792 
793 #define parts_sub_normal(A, B) \
794     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
795 
796 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
797                                     float_status *s, bool subtract);
798 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
799                                       float_status *s, bool subtract);
800 
801 #define parts_addsub(A, B, S, Z) \
802     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
803 
804 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
805                                  float_status *s);
806 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
807                                    float_status *s);
808 
809 #define parts_mul(A, B, S) \
810     PARTS_GENERIC_64_128(mul, A)(A, B, S)
811 
812 static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
813                                            FloatParts64 *c, int scale,
814                                            int flags, float_status *s);
815 static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
816                                              FloatParts128 *c, int scale,
817                                              int flags, float_status *s);
818 
819 #define parts_muladd_scalbn(A, B, C, Z, Y, S) \
820     PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
821 
822 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
823                                  float_status *s);
824 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
825                                    float_status *s);
826 
827 #define parts_div(A, B, S) \
828     PARTS_GENERIC_64_128(div, A)(A, B, S)
829 
830 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
831                                     uint64_t *mod_quot, float_status *s);
832 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
833                                       uint64_t *mod_quot, float_status *s);
834 
835 #define parts_modrem(A, B, Q, S) \
836     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
837 
838 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
839 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
840 
841 #define parts_sqrt(A, S, F) \
842     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
843 
844 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
845                                         int scale, int frac_size);
846 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
847                                          int scale, int frac_size);
848 
849 #define parts_round_to_int_normal(A, R, C, F) \
850     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
851 
852 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
853                                  int scale, float_status *s,
854                                  const FloatFmt *fmt);
855 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
856                                   int scale, float_status *s,
857                                   const FloatFmt *fmt);
858 
859 #define parts_round_to_int(A, R, C, S, F) \
860     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
861 
862 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
863                                      int scale, int64_t min, int64_t max,
864                                      float_status *s);
865 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
866                                      int scale, int64_t min, int64_t max,
867                                      float_status *s);
868 
869 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
870     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
871 
872 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
873                                       int scale, uint64_t max,
874                                       float_status *s);
875 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
876                                        int scale, uint64_t max,
877                                        float_status *s);
878 
879 #define parts_float_to_uint(P, R, Z, M, S) \
880     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
881 
882 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
883                                             FloatRoundMode rmode,
884                                             int bitsm1, float_status *s);
885 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
886                                              FloatRoundMode rmode,
887                                              int bitsm1, float_status *s);
888 
889 #define parts_float_to_sint_modulo(P, R, M, S) \
890     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
891 
892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
893                                   int scale, float_status *s);
894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
895                                    int scale, float_status *s);
896 
897 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
898     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
899 
900 #define parts_sint_to_float(P, I, Z, S) \
901     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
902 
903 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
904                                   int scale, float_status *s);
905 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
906                                    int scale, float_status *s);
907 
908 #define parts_uint_to_float(P, I, Z, S) \
909     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
910 
911 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
912                                     float_status *s, int flags);
913 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
914                                       float_status *s, int flags);
915 
916 #define parts_minmax(A, B, S, F) \
917     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
918 
919 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
920                                      float_status *s, bool q);
921 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
922                                       float_status *s, bool q);
923 
924 #define parts_compare(A, B, S, Q) \
925     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
926 
927 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
928 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
929 
930 #define parts_scalbn(A, N, S) \
931     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
932 
933 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
934 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
935 
936 #define parts_log2(A, S, F) \
937     PARTS_GENERIC_64_128(log2, A)(A, S, F)
938 
939 /*
940  * Helper functions for softfloat-parts.c.inc, per-size operations.
941  */
942 
943 #define FRAC_GENERIC_64_128(NAME, P) \
944     _Generic((P), FloatParts64 *: frac64_##NAME, \
945                   FloatParts128 *: frac128_##NAME)
946 
947 #define FRAC_GENERIC_64_128_256(NAME, P) \
948     _Generic((P), FloatParts64 *: frac64_##NAME, \
949                   FloatParts128 *: frac128_##NAME, \
950                   FloatParts256 *: frac256_##NAME)
951 
952 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
953 {
954     return uadd64_overflow(a->frac, b->frac, &r->frac);
955 }
956 
957 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
958 {
959     bool c = 0;
960     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
961     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
962     return c;
963 }
964 
965 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
966 {
967     bool c = 0;
968     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
969     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
970     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
971     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
972     return c;
973 }
974 
975 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
976 
977 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
978 {
979     return uadd64_overflow(a->frac, c, &r->frac);
980 }
981 
982 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
983 {
984     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
985     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
986 }
987 
988 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
989 
990 static void frac64_allones(FloatParts64 *a)
991 {
992     a->frac = -1;
993 }
994 
995 static void frac128_allones(FloatParts128 *a)
996 {
997     a->frac_hi = a->frac_lo = -1;
998 }
999 
1000 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
1001 
1002 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
1003 {
1004     return (a->frac == b->frac ? float_relation_equal
1005             : a->frac < b->frac ? float_relation_less
1006             : float_relation_greater);
1007 }
1008 
1009 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
1010 {
1011     uint64_t ta = a->frac_hi, tb = b->frac_hi;
1012     if (ta == tb) {
1013         ta = a->frac_lo, tb = b->frac_lo;
1014         if (ta == tb) {
1015             return float_relation_equal;
1016         }
1017     }
1018     return ta < tb ? float_relation_less : float_relation_greater;
1019 }
1020 
1021 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1022 
1023 static void frac64_clear(FloatParts64 *a)
1024 {
1025     a->frac = 0;
1026 }
1027 
1028 static void frac128_clear(FloatParts128 *a)
1029 {
1030     a->frac_hi = a->frac_lo = 0;
1031 }
1032 
1033 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1034 
1035 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1036 {
1037     uint64_t n1, n0, r, q;
1038     bool ret;
1039 
1040     /*
1041      * We want a 2*N / N-bit division to produce exactly an N-bit
1042      * result, so that we do not lose any precision and so that we
1043      * do not have to renormalize afterward.  If A.frac < B.frac,
1044      * then division would produce an (N-1)-bit result; shift A left
1045      * by one to produce the an N-bit result, and return true to
1046      * decrement the exponent to match.
1047      *
1048      * The udiv_qrnnd algorithm that we're using requires normalization,
1049      * i.e. the msb of the denominator must be set, which is already true.
1050      */
1051     ret = a->frac < b->frac;
1052     if (ret) {
1053         n0 = a->frac;
1054         n1 = 0;
1055     } else {
1056         n0 = a->frac >> 1;
1057         n1 = a->frac << 63;
1058     }
1059     q = udiv_qrnnd(&r, n0, n1, b->frac);
1060 
1061     /* Set lsb if there is a remainder, to set inexact. */
1062     a->frac = q | (r != 0);
1063 
1064     return ret;
1065 }
1066 
1067 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1068 {
1069     uint64_t q0, q1, a0, a1, b0, b1;
1070     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1071     bool ret = false;
1072 
1073     a0 = a->frac_hi, a1 = a->frac_lo;
1074     b0 = b->frac_hi, b1 = b->frac_lo;
1075 
1076     ret = lt128(a0, a1, b0, b1);
1077     if (!ret) {
1078         a1 = shr_double(a0, a1, 1);
1079         a0 = a0 >> 1;
1080     }
1081 
1082     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1083     q0 = estimateDiv128To64(a0, a1, b0);
1084 
1085     /*
1086      * Estimate is high because B1 was not included (unless B1 == 0).
1087      * Reduce quotient and increase remainder until remainder is non-negative.
1088      * This loop will execute 0 to 2 times.
1089      */
1090     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1091     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1092     while (r0 != 0) {
1093         q0--;
1094         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1095     }
1096 
1097     /* Repeat using the remainder, producing a second word of quotient. */
1098     q1 = estimateDiv128To64(r1, r2, b0);
1099     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1100     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1101     while (r1 != 0) {
1102         q1--;
1103         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1104     }
1105 
1106     /* Any remainder indicates inexact; set sticky bit. */
1107     q1 |= (r2 | r3) != 0;
1108 
1109     a->frac_hi = q0;
1110     a->frac_lo = q1;
1111     return ret;
1112 }
1113 
1114 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1115 
1116 static bool frac64_eqz(FloatParts64 *a)
1117 {
1118     return a->frac == 0;
1119 }
1120 
1121 static bool frac128_eqz(FloatParts128 *a)
1122 {
1123     return (a->frac_hi | a->frac_lo) == 0;
1124 }
1125 
1126 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1127 
1128 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1129 {
1130     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1131 }
1132 
1133 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1134 {
1135     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1136                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1137 }
1138 
1139 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1140 
1141 static void frac64_neg(FloatParts64 *a)
1142 {
1143     a->frac = -a->frac;
1144 }
1145 
1146 static void frac128_neg(FloatParts128 *a)
1147 {
1148     bool c = 0;
1149     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1150     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1151 }
1152 
1153 static void frac256_neg(FloatParts256 *a)
1154 {
1155     bool c = 0;
1156     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1157     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1158     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1159     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1160 }
1161 
1162 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1163 
1164 static int frac64_normalize(FloatParts64 *a)
1165 {
1166     if (a->frac) {
1167         int shift = clz64(a->frac);
1168         a->frac <<= shift;
1169         return shift;
1170     }
1171     return 64;
1172 }
1173 
1174 static int frac128_normalize(FloatParts128 *a)
1175 {
1176     if (a->frac_hi) {
1177         int shl = clz64(a->frac_hi);
1178         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1179         a->frac_lo <<= shl;
1180         return shl;
1181     } else if (a->frac_lo) {
1182         int shl = clz64(a->frac_lo);
1183         a->frac_hi = a->frac_lo << shl;
1184         a->frac_lo = 0;
1185         return shl + 64;
1186     }
1187     return 128;
1188 }
1189 
1190 static int frac256_normalize(FloatParts256 *a)
1191 {
1192     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1193     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1194     int ret, shl;
1195 
1196     if (likely(a0)) {
1197         shl = clz64(a0);
1198         if (shl == 0) {
1199             return 0;
1200         }
1201         ret = shl;
1202     } else {
1203         if (a1) {
1204             ret = 64;
1205             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1206         } else if (a2) {
1207             ret = 128;
1208             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1209         } else if (a3) {
1210             ret = 192;
1211             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1212         } else {
1213             ret = 256;
1214             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1215             goto done;
1216         }
1217         shl = clz64(a0);
1218         if (shl == 0) {
1219             goto done;
1220         }
1221         ret += shl;
1222     }
1223 
1224     a0 = shl_double(a0, a1, shl);
1225     a1 = shl_double(a1, a2, shl);
1226     a2 = shl_double(a2, a3, shl);
1227     a3 <<= shl;
1228 
1229  done:
1230     a->frac_hi = a0;
1231     a->frac_hm = a1;
1232     a->frac_lm = a2;
1233     a->frac_lo = a3;
1234     return ret;
1235 }
1236 
1237 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1238 
1239 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1240 {
1241     uint64_t a0, a1, b0, t0, t1, q, quot;
1242     int exp_diff = a->exp - b->exp;
1243     int shift;
1244 
1245     a0 = a->frac;
1246     a1 = 0;
1247 
1248     if (exp_diff < -1) {
1249         if (mod_quot) {
1250             *mod_quot = 0;
1251         }
1252         return;
1253     }
1254     if (exp_diff == -1) {
1255         a0 >>= 1;
1256         exp_diff = 0;
1257     }
1258 
1259     b0 = b->frac;
1260     quot = q = b0 <= a0;
1261     if (q) {
1262         a0 -= b0;
1263     }
1264 
1265     exp_diff -= 64;
1266     while (exp_diff > 0) {
1267         q = estimateDiv128To64(a0, a1, b0);
1268         q = q > 2 ? q - 2 : 0;
1269         mul64To128(b0, q, &t0, &t1);
1270         sub128(a0, a1, t0, t1, &a0, &a1);
1271         shortShift128Left(a0, a1, 62, &a0, &a1);
1272         exp_diff -= 62;
1273         quot = (quot << 62) + q;
1274     }
1275 
1276     exp_diff += 64;
1277     if (exp_diff > 0) {
1278         q = estimateDiv128To64(a0, a1, b0);
1279         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1280         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1281         sub128(a0, a1, t0, t1, &a0, &a1);
1282         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1283         while (le128(t0, t1, a0, a1)) {
1284             ++q;
1285             sub128(a0, a1, t0, t1, &a0, &a1);
1286         }
1287         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1288     } else {
1289         t0 = b0;
1290         t1 = 0;
1291     }
1292 
1293     if (mod_quot) {
1294         *mod_quot = quot;
1295     } else {
1296         sub128(t0, t1, a0, a1, &t0, &t1);
1297         if (lt128(t0, t1, a0, a1) ||
1298             (eq128(t0, t1, a0, a1) && (q & 1))) {
1299             a0 = t0;
1300             a1 = t1;
1301             a->sign = !a->sign;
1302         }
1303     }
1304 
1305     if (likely(a0)) {
1306         shift = clz64(a0);
1307         shortShift128Left(a0, a1, shift, &a0, &a1);
1308     } else if (likely(a1)) {
1309         shift = clz64(a1);
1310         a0 = a1 << shift;
1311         a1 = 0;
1312         shift += 64;
1313     } else {
1314         a->cls = float_class_zero;
1315         return;
1316     }
1317 
1318     a->exp = b->exp + exp_diff - shift;
1319     a->frac = a0 | (a1 != 0);
1320 }
1321 
1322 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1323                            uint64_t *mod_quot)
1324 {
1325     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1326     int exp_diff = a->exp - b->exp;
1327     int shift;
1328 
1329     a0 = a->frac_hi;
1330     a1 = a->frac_lo;
1331     a2 = 0;
1332 
1333     if (exp_diff < -1) {
1334         if (mod_quot) {
1335             *mod_quot = 0;
1336         }
1337         return;
1338     }
1339     if (exp_diff == -1) {
1340         shift128Right(a0, a1, 1, &a0, &a1);
1341         exp_diff = 0;
1342     }
1343 
1344     b0 = b->frac_hi;
1345     b1 = b->frac_lo;
1346 
1347     quot = q = le128(b0, b1, a0, a1);
1348     if (q) {
1349         sub128(a0, a1, b0, b1, &a0, &a1);
1350     }
1351 
1352     exp_diff -= 64;
1353     while (exp_diff > 0) {
1354         q = estimateDiv128To64(a0, a1, b0);
1355         q = q > 4 ? q - 4 : 0;
1356         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1357         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1358         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1359         exp_diff -= 61;
1360         quot = (quot << 61) + q;
1361     }
1362 
1363     exp_diff += 64;
1364     if (exp_diff > 0) {
1365         q = estimateDiv128To64(a0, a1, b0);
1366         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1367         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1368         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1369         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1370         while (le192(t0, t1, t2, a0, a1, a2)) {
1371             ++q;
1372             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1373         }
1374         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1375     } else {
1376         t0 = b0;
1377         t1 = b1;
1378         t2 = 0;
1379     }
1380 
1381     if (mod_quot) {
1382         *mod_quot = quot;
1383     } else {
1384         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1385         if (lt192(t0, t1, t2, a0, a1, a2) ||
1386             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1387             a0 = t0;
1388             a1 = t1;
1389             a2 = t2;
1390             a->sign = !a->sign;
1391         }
1392     }
1393 
1394     if (likely(a0)) {
1395         shift = clz64(a0);
1396         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1397     } else if (likely(a1)) {
1398         shift = clz64(a1);
1399         shortShift128Left(a1, a2, shift, &a0, &a1);
1400         a2 = 0;
1401         shift += 64;
1402     } else if (likely(a2)) {
1403         shift = clz64(a2);
1404         a0 = a2 << shift;
1405         a1 = a2 = 0;
1406         shift += 128;
1407     } else {
1408         a->cls = float_class_zero;
1409         return;
1410     }
1411 
1412     a->exp = b->exp + exp_diff - shift;
1413     a->frac_hi = a0;
1414     a->frac_lo = a1 | (a2 != 0);
1415 }
1416 
1417 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1418 
1419 static void frac64_shl(FloatParts64 *a, int c)
1420 {
1421     a->frac <<= c;
1422 }
1423 
1424 static void frac128_shl(FloatParts128 *a, int c)
1425 {
1426     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1427 
1428     if (c & 64) {
1429         a0 = a1, a1 = 0;
1430     }
1431 
1432     c &= 63;
1433     if (c) {
1434         a0 = shl_double(a0, a1, c);
1435         a1 = a1 << c;
1436     }
1437 
1438     a->frac_hi = a0;
1439     a->frac_lo = a1;
1440 }
1441 
1442 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1443 
1444 static void frac64_shr(FloatParts64 *a, int c)
1445 {
1446     a->frac >>= c;
1447 }
1448 
1449 static void frac128_shr(FloatParts128 *a, int c)
1450 {
1451     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1452 
1453     if (c & 64) {
1454         a1 = a0, a0 = 0;
1455     }
1456 
1457     c &= 63;
1458     if (c) {
1459         a1 = shr_double(a0, a1, c);
1460         a0 = a0 >> c;
1461     }
1462 
1463     a->frac_hi = a0;
1464     a->frac_lo = a1;
1465 }
1466 
1467 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1468 
1469 static void frac64_shrjam(FloatParts64 *a, int c)
1470 {
1471     uint64_t a0 = a->frac;
1472 
1473     if (likely(c != 0)) {
1474         if (likely(c < 64)) {
1475             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1476         } else {
1477             a0 = a0 != 0;
1478         }
1479         a->frac = a0;
1480     }
1481 }
1482 
1483 static void frac128_shrjam(FloatParts128 *a, int c)
1484 {
1485     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1486     uint64_t sticky = 0;
1487 
1488     if (unlikely(c == 0)) {
1489         return;
1490     } else if (likely(c < 64)) {
1491         /* nothing */
1492     } else if (likely(c < 128)) {
1493         sticky = a1;
1494         a1 = a0;
1495         a0 = 0;
1496         c &= 63;
1497         if (c == 0) {
1498             goto done;
1499         }
1500     } else {
1501         sticky = a0 | a1;
1502         a0 = a1 = 0;
1503         goto done;
1504     }
1505 
1506     sticky |= shr_double(a1, 0, c);
1507     a1 = shr_double(a0, a1, c);
1508     a0 = a0 >> c;
1509 
1510  done:
1511     a->frac_lo = a1 | (sticky != 0);
1512     a->frac_hi = a0;
1513 }
1514 
1515 static void frac256_shrjam(FloatParts256 *a, int c)
1516 {
1517     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1518     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1519     uint64_t sticky = 0;
1520 
1521     if (unlikely(c == 0)) {
1522         return;
1523     } else if (likely(c < 64)) {
1524         /* nothing */
1525     } else if (likely(c < 256)) {
1526         if (unlikely(c & 128)) {
1527             sticky |= a2 | a3;
1528             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1529         }
1530         if (unlikely(c & 64)) {
1531             sticky |= a3;
1532             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1533         }
1534         c &= 63;
1535         if (c == 0) {
1536             goto done;
1537         }
1538     } else {
1539         sticky = a0 | a1 | a2 | a3;
1540         a0 = a1 = a2 = a3 = 0;
1541         goto done;
1542     }
1543 
1544     sticky |= shr_double(a3, 0, c);
1545     a3 = shr_double(a2, a3, c);
1546     a2 = shr_double(a1, a2, c);
1547     a1 = shr_double(a0, a1, c);
1548     a0 = a0 >> c;
1549 
1550  done:
1551     a->frac_lo = a3 | (sticky != 0);
1552     a->frac_lm = a2;
1553     a->frac_hm = a1;
1554     a->frac_hi = a0;
1555 }
1556 
1557 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1558 
1559 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1560 {
1561     return usub64_overflow(a->frac, b->frac, &r->frac);
1562 }
1563 
1564 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1565 {
1566     bool c = 0;
1567     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1568     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1569     return c;
1570 }
1571 
1572 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1573 {
1574     bool c = 0;
1575     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1576     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1577     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1578     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1579     return c;
1580 }
1581 
1582 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1583 
1584 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1585 {
1586     r->frac = a->frac_hi | (a->frac_lo != 0);
1587 }
1588 
1589 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1590 {
1591     r->frac_hi = a->frac_hi;
1592     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1593 }
1594 
1595 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1596 
1597 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1598 {
1599     r->frac_hi = a->frac;
1600     r->frac_lo = 0;
1601 }
1602 
1603 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1604 {
1605     r->frac_hi = a->frac_hi;
1606     r->frac_hm = a->frac_lo;
1607     r->frac_lm = 0;
1608     r->frac_lo = 0;
1609 }
1610 
1611 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1612 
1613 /*
1614  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1615  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1616  * and thus MIT licenced.
1617  */
1618 static const uint16_t rsqrt_tab[128] = {
1619     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1620     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1621     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1622     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1623     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1624     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1625     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1626     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1627     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1628     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1629     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1630     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1631     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1632     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1633     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1634     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1635 };
1636 
1637 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1638 #define FloatPartsN    glue(FloatParts,N)
1639 #define FloatPartsW    glue(FloatParts,W)
1640 
1641 #define N 64
1642 #define W 128
1643 
1644 #include "softfloat-parts-addsub.c.inc"
1645 #include "softfloat-parts.c.inc"
1646 
1647 #undef  N
1648 #undef  W
1649 #define N 128
1650 #define W 256
1651 
1652 #include "softfloat-parts-addsub.c.inc"
1653 #include "softfloat-parts.c.inc"
1654 
1655 #undef  N
1656 #undef  W
1657 #define N            256
1658 
1659 #include "softfloat-parts-addsub.c.inc"
1660 
1661 #undef  N
1662 #undef  W
1663 #undef  partsN
1664 #undef  FloatPartsN
1665 #undef  FloatPartsW
1666 
1667 /*
1668  * Pack/unpack routines with a specific FloatFmt.
1669  */
1670 
1671 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1672                                       float_status *s, const FloatFmt *params)
1673 {
1674     float16_unpack_raw(p, f);
1675     parts_canonicalize(p, s, params);
1676 }
1677 
1678 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1679                                      float_status *s)
1680 {
1681     float16a_unpack_canonical(p, f, s, &float16_params);
1682 }
1683 
1684 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1685                                       float_status *s)
1686 {
1687     bfloat16_unpack_raw(p, f);
1688     parts_canonicalize(p, s, &bfloat16_params);
1689 }
1690 
1691 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1692                                              float_status *s,
1693                                              const FloatFmt *params)
1694 {
1695     parts_uncanon(p, s, params);
1696     return float16_pack_raw(p);
1697 }
1698 
1699 static float16 float16_round_pack_canonical(FloatParts64 *p,
1700                                             float_status *s)
1701 {
1702     return float16a_round_pack_canonical(p, s, &float16_params);
1703 }
1704 
1705 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1706                                               float_status *s)
1707 {
1708     parts_uncanon(p, s, &bfloat16_params);
1709     return bfloat16_pack_raw(p);
1710 }
1711 
1712 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1713                                      float_status *s)
1714 {
1715     float32_unpack_raw(p, f);
1716     parts_canonicalize(p, s, &float32_params);
1717 }
1718 
1719 static float32 float32_round_pack_canonical(FloatParts64 *p,
1720                                             float_status *s)
1721 {
1722     parts_uncanon(p, s, &float32_params);
1723     return float32_pack_raw(p);
1724 }
1725 
1726 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1727                                      float_status *s)
1728 {
1729     float64_unpack_raw(p, f);
1730     parts_canonicalize(p, s, &float64_params);
1731 }
1732 
1733 static float64 float64_round_pack_canonical(FloatParts64 *p,
1734                                             float_status *s)
1735 {
1736     parts_uncanon(p, s, &float64_params);
1737     return float64_pack_raw(p);
1738 }
1739 
1740 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1741                                                float_status *s)
1742 {
1743     parts_uncanon(p, s, &float32_params);
1744 
1745     /*
1746      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1747      * We need to adjust the fraction higher so that the least N bits are
1748      * zero, and the fraction is adjacent to the float64 implicit bit.
1749      */
1750     switch (p->cls) {
1751     case float_class_normal:
1752     case float_class_denormal:
1753         if (unlikely(p->exp == 0)) {
1754             /*
1755              * The result is denormal for float32, but can be represented
1756              * in normalized form for float64.  Adjust, per canonicalize.
1757              */
1758             int shift = frac_normalize(p);
1759             p->exp = (float32_params.frac_shift -
1760                       float32_params.exp_bias - shift + 1 +
1761                       float64_params.exp_bias);
1762             frac_shr(p, float64_params.frac_shift);
1763         } else {
1764             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1765             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1766         }
1767         break;
1768     case float_class_snan:
1769     case float_class_qnan:
1770         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1771         p->exp = float64_params.exp_max;
1772         break;
1773     case float_class_inf:
1774         p->exp = float64_params.exp_max;
1775         break;
1776     case float_class_zero:
1777         break;
1778     default:
1779         g_assert_not_reached();
1780     }
1781 
1782     return float64_pack_raw(p);
1783 }
1784 
1785 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1786                                       float_status *s)
1787 {
1788     float128_unpack_raw(p, f);
1789     parts_canonicalize(p, s, &float128_params);
1790 }
1791 
1792 static float128 float128_round_pack_canonical(FloatParts128 *p,
1793                                               float_status *s)
1794 {
1795     parts_uncanon(p, s, &float128_params);
1796     return float128_pack_raw(p);
1797 }
1798 
1799 /* Returns false if the encoding is invalid. */
1800 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1801                                       float_status *s)
1802 {
1803     /* Ensure rounding precision is set before beginning. */
1804     switch (s->floatx80_rounding_precision) {
1805     case floatx80_precision_x:
1806     case floatx80_precision_d:
1807     case floatx80_precision_s:
1808         break;
1809     default:
1810         g_assert_not_reached();
1811     }
1812 
1813     if (unlikely(floatx80_invalid_encoding(f))) {
1814         float_raise(float_flag_invalid, s);
1815         return false;
1816     }
1817 
1818     floatx80_unpack_raw(p, f);
1819 
1820     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1821         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1822     } else {
1823         /* The explicit integer bit is ignored, after invalid checks. */
1824         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1825         p->cls = (p->frac_hi == 0 ? float_class_inf
1826                   : parts_is_snan_frac(p->frac_hi, s)
1827                   ? float_class_snan : float_class_qnan);
1828     }
1829     return true;
1830 }
1831 
1832 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1833                                               float_status *s)
1834 {
1835     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1836     uint64_t frac;
1837     int exp;
1838 
1839     switch (p->cls) {
1840     case float_class_normal:
1841     case float_class_denormal:
1842         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1843             parts_uncanon_normal(p, s, fmt);
1844             frac = p->frac_hi;
1845             exp = p->exp;
1846         } else {
1847             FloatParts64 p64;
1848 
1849             p64.sign = p->sign;
1850             p64.exp = p->exp;
1851             frac_truncjam(&p64, p);
1852             parts_uncanon_normal(&p64, s, fmt);
1853             frac = p64.frac;
1854             exp = p64.exp;
1855         }
1856         if (exp != fmt->exp_max) {
1857             break;
1858         }
1859         /* rounded to inf -- fall through to set frac correctly */
1860 
1861     case float_class_inf:
1862         /* x86 and m68k differ in the setting of the integer bit. */
1863         frac = floatx80_infinity_low;
1864         exp = fmt->exp_max;
1865         break;
1866 
1867     case float_class_zero:
1868         frac = 0;
1869         exp = 0;
1870         break;
1871 
1872     case float_class_snan:
1873     case float_class_qnan:
1874         /* NaNs have the integer bit set. */
1875         frac = p->frac_hi | (1ull << 63);
1876         exp = fmt->exp_max;
1877         break;
1878 
1879     default:
1880         g_assert_not_reached();
1881     }
1882 
1883     return packFloatx80(p->sign, exp, frac);
1884 }
1885 
1886 /*
1887  * Addition and subtraction
1888  */
1889 
1890 static float16 QEMU_FLATTEN
1891 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1892 {
1893     FloatParts64 pa, pb, *pr;
1894 
1895     float16_unpack_canonical(&pa, a, status);
1896     float16_unpack_canonical(&pb, b, status);
1897     pr = parts_addsub(&pa, &pb, status, subtract);
1898 
1899     return float16_round_pack_canonical(pr, status);
1900 }
1901 
1902 float16 float16_add(float16 a, float16 b, float_status *status)
1903 {
1904     return float16_addsub(a, b, status, false);
1905 }
1906 
1907 float16 float16_sub(float16 a, float16 b, float_status *status)
1908 {
1909     return float16_addsub(a, b, status, true);
1910 }
1911 
1912 static float32 QEMU_SOFTFLOAT_ATTR
1913 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1914 {
1915     FloatParts64 pa, pb, *pr;
1916 
1917     float32_unpack_canonical(&pa, a, status);
1918     float32_unpack_canonical(&pb, b, status);
1919     pr = parts_addsub(&pa, &pb, status, subtract);
1920 
1921     return float32_round_pack_canonical(pr, status);
1922 }
1923 
1924 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1925 {
1926     return soft_f32_addsub(a, b, status, false);
1927 }
1928 
1929 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1930 {
1931     return soft_f32_addsub(a, b, status, true);
1932 }
1933 
1934 static float64 QEMU_SOFTFLOAT_ATTR
1935 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1936 {
1937     FloatParts64 pa, pb, *pr;
1938 
1939     float64_unpack_canonical(&pa, a, status);
1940     float64_unpack_canonical(&pb, b, status);
1941     pr = parts_addsub(&pa, &pb, status, subtract);
1942 
1943     return float64_round_pack_canonical(pr, status);
1944 }
1945 
1946 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1947 {
1948     return soft_f64_addsub(a, b, status, false);
1949 }
1950 
1951 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1952 {
1953     return soft_f64_addsub(a, b, status, true);
1954 }
1955 
1956 static float hard_f32_add(float a, float b)
1957 {
1958     return a + b;
1959 }
1960 
1961 static float hard_f32_sub(float a, float b)
1962 {
1963     return a - b;
1964 }
1965 
1966 static double hard_f64_add(double a, double b)
1967 {
1968     return a + b;
1969 }
1970 
1971 static double hard_f64_sub(double a, double b)
1972 {
1973     return a - b;
1974 }
1975 
1976 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1977 {
1978     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1979         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1980     }
1981     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1982 }
1983 
1984 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1985 {
1986     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1987         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1988     } else {
1989         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1990     }
1991 }
1992 
1993 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1994                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1995 {
1996     return float32_gen2(a, b, s, hard, soft,
1997                         f32_is_zon2, f32_addsubmul_post);
1998 }
1999 
2000 static float64 float64_addsub(float64 a, float64 b, float_status *s,
2001                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
2002 {
2003     return float64_gen2(a, b, s, hard, soft,
2004                         f64_is_zon2, f64_addsubmul_post);
2005 }
2006 
2007 float32 QEMU_FLATTEN
2008 float32_add(float32 a, float32 b, float_status *s)
2009 {
2010     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
2011 }
2012 
2013 float32 QEMU_FLATTEN
2014 float32_sub(float32 a, float32 b, float_status *s)
2015 {
2016     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
2017 }
2018 
2019 float64 QEMU_FLATTEN
2020 float64_add(float64 a, float64 b, float_status *s)
2021 {
2022     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2023 }
2024 
2025 float64 QEMU_FLATTEN
2026 float64_sub(float64 a, float64 b, float_status *s)
2027 {
2028     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2029 }
2030 
2031 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2032                                  bool subtract)
2033 {
2034     FloatParts64 pa, pb, *pr;
2035 
2036     float64_unpack_canonical(&pa, a, status);
2037     float64_unpack_canonical(&pb, b, status);
2038     pr = parts_addsub(&pa, &pb, status, subtract);
2039 
2040     return float64r32_round_pack_canonical(pr, status);
2041 }
2042 
2043 float64 float64r32_add(float64 a, float64 b, float_status *status)
2044 {
2045     return float64r32_addsub(a, b, status, false);
2046 }
2047 
2048 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2049 {
2050     return float64r32_addsub(a, b, status, true);
2051 }
2052 
2053 static bfloat16 QEMU_FLATTEN
2054 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2055 {
2056     FloatParts64 pa, pb, *pr;
2057 
2058     bfloat16_unpack_canonical(&pa, a, status);
2059     bfloat16_unpack_canonical(&pb, b, status);
2060     pr = parts_addsub(&pa, &pb, status, subtract);
2061 
2062     return bfloat16_round_pack_canonical(pr, status);
2063 }
2064 
2065 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2066 {
2067     return bfloat16_addsub(a, b, status, false);
2068 }
2069 
2070 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2071 {
2072     return bfloat16_addsub(a, b, status, true);
2073 }
2074 
2075 static float128 QEMU_FLATTEN
2076 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079 
2080     float128_unpack_canonical(&pa, a, status);
2081     float128_unpack_canonical(&pb, b, status);
2082     pr = parts_addsub(&pa, &pb, status, subtract);
2083 
2084     return float128_round_pack_canonical(pr, status);
2085 }
2086 
2087 float128 float128_add(float128 a, float128 b, float_status *status)
2088 {
2089     return float128_addsub(a, b, status, false);
2090 }
2091 
2092 float128 float128_sub(float128 a, float128 b, float_status *status)
2093 {
2094     return float128_addsub(a, b, status, true);
2095 }
2096 
2097 static floatx80 QEMU_FLATTEN
2098 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2099 {
2100     FloatParts128 pa, pb, *pr;
2101 
2102     if (!floatx80_unpack_canonical(&pa, a, status) ||
2103         !floatx80_unpack_canonical(&pb, b, status)) {
2104         return floatx80_default_nan(status);
2105     }
2106 
2107     pr = parts_addsub(&pa, &pb, status, subtract);
2108     return floatx80_round_pack_canonical(pr, status);
2109 }
2110 
2111 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2112 {
2113     return floatx80_addsub(a, b, status, false);
2114 }
2115 
2116 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2117 {
2118     return floatx80_addsub(a, b, status, true);
2119 }
2120 
2121 /*
2122  * Multiplication
2123  */
2124 
2125 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2126 {
2127     FloatParts64 pa, pb, *pr;
2128 
2129     float16_unpack_canonical(&pa, a, status);
2130     float16_unpack_canonical(&pb, b, status);
2131     pr = parts_mul(&pa, &pb, status);
2132 
2133     return float16_round_pack_canonical(pr, status);
2134 }
2135 
2136 static float32 QEMU_SOFTFLOAT_ATTR
2137 soft_f32_mul(float32 a, float32 b, float_status *status)
2138 {
2139     FloatParts64 pa, pb, *pr;
2140 
2141     float32_unpack_canonical(&pa, a, status);
2142     float32_unpack_canonical(&pb, b, status);
2143     pr = parts_mul(&pa, &pb, status);
2144 
2145     return float32_round_pack_canonical(pr, status);
2146 }
2147 
2148 static float64 QEMU_SOFTFLOAT_ATTR
2149 soft_f64_mul(float64 a, float64 b, float_status *status)
2150 {
2151     FloatParts64 pa, pb, *pr;
2152 
2153     float64_unpack_canonical(&pa, a, status);
2154     float64_unpack_canonical(&pb, b, status);
2155     pr = parts_mul(&pa, &pb, status);
2156 
2157     return float64_round_pack_canonical(pr, status);
2158 }
2159 
2160 static float hard_f32_mul(float a, float b)
2161 {
2162     return a * b;
2163 }
2164 
2165 static double hard_f64_mul(double a, double b)
2166 {
2167     return a * b;
2168 }
2169 
2170 float32 QEMU_FLATTEN
2171 float32_mul(float32 a, float32 b, float_status *s)
2172 {
2173     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2174                         f32_is_zon2, f32_addsubmul_post);
2175 }
2176 
2177 float64 QEMU_FLATTEN
2178 float64_mul(float64 a, float64 b, float_status *s)
2179 {
2180     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2181                         f64_is_zon2, f64_addsubmul_post);
2182 }
2183 
2184 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2185 {
2186     FloatParts64 pa, pb, *pr;
2187 
2188     float64_unpack_canonical(&pa, a, status);
2189     float64_unpack_canonical(&pb, b, status);
2190     pr = parts_mul(&pa, &pb, status);
2191 
2192     return float64r32_round_pack_canonical(pr, status);
2193 }
2194 
2195 bfloat16 QEMU_FLATTEN
2196 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2197 {
2198     FloatParts64 pa, pb, *pr;
2199 
2200     bfloat16_unpack_canonical(&pa, a, status);
2201     bfloat16_unpack_canonical(&pb, b, status);
2202     pr = parts_mul(&pa, &pb, status);
2203 
2204     return bfloat16_round_pack_canonical(pr, status);
2205 }
2206 
2207 float128 QEMU_FLATTEN
2208 float128_mul(float128 a, float128 b, float_status *status)
2209 {
2210     FloatParts128 pa, pb, *pr;
2211 
2212     float128_unpack_canonical(&pa, a, status);
2213     float128_unpack_canonical(&pb, b, status);
2214     pr = parts_mul(&pa, &pb, status);
2215 
2216     return float128_round_pack_canonical(pr, status);
2217 }
2218 
2219 floatx80 QEMU_FLATTEN
2220 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2221 {
2222     FloatParts128 pa, pb, *pr;
2223 
2224     if (!floatx80_unpack_canonical(&pa, a, status) ||
2225         !floatx80_unpack_canonical(&pb, b, status)) {
2226         return floatx80_default_nan(status);
2227     }
2228 
2229     pr = parts_mul(&pa, &pb, status);
2230     return floatx80_round_pack_canonical(pr, status);
2231 }
2232 
2233 /*
2234  * Fused multiply-add
2235  */
2236 
2237 float16 QEMU_FLATTEN
2238 float16_muladd_scalbn(float16 a, float16 b, float16 c,
2239                       int scale, int flags, float_status *status)
2240 {
2241     FloatParts64 pa, pb, pc, *pr;
2242 
2243     float16_unpack_canonical(&pa, a, status);
2244     float16_unpack_canonical(&pb, b, status);
2245     float16_unpack_canonical(&pc, c, status);
2246     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2247 
2248     return float16_round_pack_canonical(pr, status);
2249 }
2250 
2251 float16 float16_muladd(float16 a, float16 b, float16 c,
2252                        int flags, float_status *status)
2253 {
2254     return float16_muladd_scalbn(a, b, c, 0, flags, status);
2255 }
2256 
2257 float32 QEMU_SOFTFLOAT_ATTR
2258 float32_muladd_scalbn(float32 a, float32 b, float32 c,
2259                       int scale, int flags, float_status *status)
2260 {
2261     FloatParts64 pa, pb, pc, *pr;
2262 
2263     float32_unpack_canonical(&pa, a, status);
2264     float32_unpack_canonical(&pb, b, status);
2265     float32_unpack_canonical(&pc, c, status);
2266     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2267 
2268     return float32_round_pack_canonical(pr, status);
2269 }
2270 
2271 float64 QEMU_SOFTFLOAT_ATTR
2272 float64_muladd_scalbn(float64 a, float64 b, float64 c,
2273                       int scale, int flags, float_status *status)
2274 {
2275     FloatParts64 pa, pb, pc, *pr;
2276 
2277     float64_unpack_canonical(&pa, a, status);
2278     float64_unpack_canonical(&pb, b, status);
2279     float64_unpack_canonical(&pc, c, status);
2280     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2281 
2282     return float64_round_pack_canonical(pr, status);
2283 }
2284 
2285 static bool force_soft_fma;
2286 
2287 float32 QEMU_FLATTEN
2288 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2289 {
2290     union_float32 ua, ub, uc, ur;
2291 
2292     ua.s = xa;
2293     ub.s = xb;
2294     uc.s = xc;
2295 
2296     if (unlikely(!can_use_fpu(s))) {
2297         goto soft;
2298     }
2299     if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
2300         goto soft;
2301     }
2302 
2303     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2304     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2305         goto soft;
2306     }
2307 
2308     if (unlikely(force_soft_fma)) {
2309         goto soft;
2310     }
2311 
2312     /*
2313      * When (a || b) == 0, there's no need to check for under/over flow,
2314      * since we know the addend is (normal || 0) and the product is 0.
2315      */
2316     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2317         union_float32 up;
2318         bool prod_sign;
2319 
2320         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2321         prod_sign ^= !!(flags & float_muladd_negate_product);
2322         up.s = float32_set_sign(float32_zero, prod_sign);
2323 
2324         if (flags & float_muladd_negate_c) {
2325             uc.h = -uc.h;
2326         }
2327         ur.h = up.h + uc.h;
2328     } else {
2329         union_float32 ua_orig = ua;
2330         union_float32 uc_orig = uc;
2331 
2332         if (flags & float_muladd_negate_product) {
2333             ua.h = -ua.h;
2334         }
2335         if (flags & float_muladd_negate_c) {
2336             uc.h = -uc.h;
2337         }
2338 
2339         ur.h = fmaf(ua.h, ub.h, uc.h);
2340 
2341         if (unlikely(f32_is_inf(ur))) {
2342             float_raise(float_flag_overflow, s);
2343         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2344             ua = ua_orig;
2345             uc = uc_orig;
2346             goto soft;
2347         }
2348     }
2349     if (flags & float_muladd_negate_result) {
2350         return float32_chs(ur.s);
2351     }
2352     return ur.s;
2353 
2354  soft:
2355     return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2356 }
2357 
2358 float64 QEMU_FLATTEN
2359 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2360 {
2361     union_float64 ua, ub, uc, ur;
2362 
2363     ua.s = xa;
2364     ub.s = xb;
2365     uc.s = xc;
2366 
2367     if (unlikely(!can_use_fpu(s))) {
2368         goto soft;
2369     }
2370 
2371     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2372     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2373         goto soft;
2374     }
2375 
2376     if (unlikely(force_soft_fma)) {
2377         goto soft;
2378     }
2379 
2380     /*
2381      * When (a || b) == 0, there's no need to check for under/over flow,
2382      * since we know the addend is (normal || 0) and the product is 0.
2383      */
2384     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2385         union_float64 up;
2386         bool prod_sign;
2387 
2388         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2389         prod_sign ^= !!(flags & float_muladd_negate_product);
2390         up.s = float64_set_sign(float64_zero, prod_sign);
2391 
2392         if (flags & float_muladd_negate_c) {
2393             uc.h = -uc.h;
2394         }
2395         ur.h = up.h + uc.h;
2396     } else {
2397         union_float64 ua_orig = ua;
2398         union_float64 uc_orig = uc;
2399 
2400         if (flags & float_muladd_negate_product) {
2401             ua.h = -ua.h;
2402         }
2403         if (flags & float_muladd_negate_c) {
2404             uc.h = -uc.h;
2405         }
2406 
2407         ur.h = fma(ua.h, ub.h, uc.h);
2408 
2409         if (unlikely(f64_is_inf(ur))) {
2410             float_raise(float_flag_overflow, s);
2411         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2412             ua = ua_orig;
2413             uc = uc_orig;
2414             goto soft;
2415         }
2416     }
2417     if (flags & float_muladd_negate_result) {
2418         return float64_chs(ur.s);
2419     }
2420     return ur.s;
2421 
2422  soft:
2423     return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2424 }
2425 
2426 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2427                           int flags, float_status *status)
2428 {
2429     FloatParts64 pa, pb, pc, *pr;
2430 
2431     float64_unpack_canonical(&pa, a, status);
2432     float64_unpack_canonical(&pb, b, status);
2433     float64_unpack_canonical(&pc, c, status);
2434     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2435 
2436     return float64r32_round_pack_canonical(pr, status);
2437 }
2438 
2439 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2440                                       int flags, float_status *status)
2441 {
2442     FloatParts64 pa, pb, pc, *pr;
2443 
2444     bfloat16_unpack_canonical(&pa, a, status);
2445     bfloat16_unpack_canonical(&pb, b, status);
2446     bfloat16_unpack_canonical(&pc, c, status);
2447     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2448 
2449     return bfloat16_round_pack_canonical(pr, status);
2450 }
2451 
2452 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2453                                       int flags, float_status *status)
2454 {
2455     FloatParts128 pa, pb, pc, *pr;
2456 
2457     float128_unpack_canonical(&pa, a, status);
2458     float128_unpack_canonical(&pb, b, status);
2459     float128_unpack_canonical(&pc, c, status);
2460     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2461 
2462     return float128_round_pack_canonical(pr, status);
2463 }
2464 
2465 /*
2466  * Division
2467  */
2468 
2469 float16 float16_div(float16 a, float16 b, float_status *status)
2470 {
2471     FloatParts64 pa, pb, *pr;
2472 
2473     float16_unpack_canonical(&pa, a, status);
2474     float16_unpack_canonical(&pb, b, status);
2475     pr = parts_div(&pa, &pb, status);
2476 
2477     return float16_round_pack_canonical(pr, status);
2478 }
2479 
2480 static float32 QEMU_SOFTFLOAT_ATTR
2481 soft_f32_div(float32 a, float32 b, float_status *status)
2482 {
2483     FloatParts64 pa, pb, *pr;
2484 
2485     float32_unpack_canonical(&pa, a, status);
2486     float32_unpack_canonical(&pb, b, status);
2487     pr = parts_div(&pa, &pb, status);
2488 
2489     return float32_round_pack_canonical(pr, status);
2490 }
2491 
2492 static float64 QEMU_SOFTFLOAT_ATTR
2493 soft_f64_div(float64 a, float64 b, float_status *status)
2494 {
2495     FloatParts64 pa, pb, *pr;
2496 
2497     float64_unpack_canonical(&pa, a, status);
2498     float64_unpack_canonical(&pb, b, status);
2499     pr = parts_div(&pa, &pb, status);
2500 
2501     return float64_round_pack_canonical(pr, status);
2502 }
2503 
2504 static float hard_f32_div(float a, float b)
2505 {
2506     return a / b;
2507 }
2508 
2509 static double hard_f64_div(double a, double b)
2510 {
2511     return a / b;
2512 }
2513 
2514 static bool f32_div_pre(union_float32 a, union_float32 b)
2515 {
2516     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2517         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2518                fpclassify(b.h) == FP_NORMAL;
2519     }
2520     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2521 }
2522 
2523 static bool f64_div_pre(union_float64 a, union_float64 b)
2524 {
2525     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2526         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2527                fpclassify(b.h) == FP_NORMAL;
2528     }
2529     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2530 }
2531 
2532 static bool f32_div_post(union_float32 a, union_float32 b)
2533 {
2534     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2535         return fpclassify(a.h) != FP_ZERO;
2536     }
2537     return !float32_is_zero(a.s);
2538 }
2539 
2540 static bool f64_div_post(union_float64 a, union_float64 b)
2541 {
2542     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2543         return fpclassify(a.h) != FP_ZERO;
2544     }
2545     return !float64_is_zero(a.s);
2546 }
2547 
2548 float32 QEMU_FLATTEN
2549 float32_div(float32 a, float32 b, float_status *s)
2550 {
2551     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2552                         f32_div_pre, f32_div_post);
2553 }
2554 
2555 float64 QEMU_FLATTEN
2556 float64_div(float64 a, float64 b, float_status *s)
2557 {
2558     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2559                         f64_div_pre, f64_div_post);
2560 }
2561 
2562 float64 float64r32_div(float64 a, float64 b, float_status *status)
2563 {
2564     FloatParts64 pa, pb, *pr;
2565 
2566     float64_unpack_canonical(&pa, a, status);
2567     float64_unpack_canonical(&pb, b, status);
2568     pr = parts_div(&pa, &pb, status);
2569 
2570     return float64r32_round_pack_canonical(pr, status);
2571 }
2572 
2573 bfloat16 QEMU_FLATTEN
2574 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2575 {
2576     FloatParts64 pa, pb, *pr;
2577 
2578     bfloat16_unpack_canonical(&pa, a, status);
2579     bfloat16_unpack_canonical(&pb, b, status);
2580     pr = parts_div(&pa, &pb, status);
2581 
2582     return bfloat16_round_pack_canonical(pr, status);
2583 }
2584 
2585 float128 QEMU_FLATTEN
2586 float128_div(float128 a, float128 b, float_status *status)
2587 {
2588     FloatParts128 pa, pb, *pr;
2589 
2590     float128_unpack_canonical(&pa, a, status);
2591     float128_unpack_canonical(&pb, b, status);
2592     pr = parts_div(&pa, &pb, status);
2593 
2594     return float128_round_pack_canonical(pr, status);
2595 }
2596 
2597 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2598 {
2599     FloatParts128 pa, pb, *pr;
2600 
2601     if (!floatx80_unpack_canonical(&pa, a, status) ||
2602         !floatx80_unpack_canonical(&pb, b, status)) {
2603         return floatx80_default_nan(status);
2604     }
2605 
2606     pr = parts_div(&pa, &pb, status);
2607     return floatx80_round_pack_canonical(pr, status);
2608 }
2609 
2610 /*
2611  * Remainder
2612  */
2613 
2614 float32 float32_rem(float32 a, float32 b, float_status *status)
2615 {
2616     FloatParts64 pa, pb, *pr;
2617 
2618     float32_unpack_canonical(&pa, a, status);
2619     float32_unpack_canonical(&pb, b, status);
2620     pr = parts_modrem(&pa, &pb, NULL, status);
2621 
2622     return float32_round_pack_canonical(pr, status);
2623 }
2624 
2625 float64 float64_rem(float64 a, float64 b, float_status *status)
2626 {
2627     FloatParts64 pa, pb, *pr;
2628 
2629     float64_unpack_canonical(&pa, a, status);
2630     float64_unpack_canonical(&pb, b, status);
2631     pr = parts_modrem(&pa, &pb, NULL, status);
2632 
2633     return float64_round_pack_canonical(pr, status);
2634 }
2635 
2636 float128 float128_rem(float128 a, float128 b, float_status *status)
2637 {
2638     FloatParts128 pa, pb, *pr;
2639 
2640     float128_unpack_canonical(&pa, a, status);
2641     float128_unpack_canonical(&pb, b, status);
2642     pr = parts_modrem(&pa, &pb, NULL, status);
2643 
2644     return float128_round_pack_canonical(pr, status);
2645 }
2646 
2647 /*
2648  * Returns the remainder of the extended double-precision floating-point value
2649  * `a' with respect to the corresponding value `b'.
2650  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2651  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2652  * the remainder based on truncating the quotient toward zero instead and
2653  * *quotient is set to the low 64 bits of the absolute value of the integer
2654  * quotient.
2655  */
2656 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2657                          uint64_t *quotient, float_status *status)
2658 {
2659     FloatParts128 pa, pb, *pr;
2660 
2661     *quotient = 0;
2662     if (!floatx80_unpack_canonical(&pa, a, status) ||
2663         !floatx80_unpack_canonical(&pb, b, status)) {
2664         return floatx80_default_nan(status);
2665     }
2666     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2667 
2668     return floatx80_round_pack_canonical(pr, status);
2669 }
2670 
2671 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2672 {
2673     uint64_t quotient;
2674     return floatx80_modrem(a, b, false, &quotient, status);
2675 }
2676 
2677 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2678 {
2679     uint64_t quotient;
2680     return floatx80_modrem(a, b, true, &quotient, status);
2681 }
2682 
2683 /*
2684  * Float to Float conversions
2685  *
2686  * Returns the result of converting one float format to another. The
2687  * conversion is performed according to the IEC/IEEE Standard for
2688  * Binary Floating-Point Arithmetic.
2689  *
2690  * Usually this only needs to take care of raising invalid exceptions
2691  * and handling the conversion on NaNs.
2692  */
2693 
2694 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2695 {
2696     switch (a->cls) {
2697     case float_class_snan:
2698         float_raise(float_flag_invalid_snan, s);
2699         /* fall through */
2700     case float_class_qnan:
2701         /*
2702          * There is no NaN in the destination format.  Raise Invalid
2703          * and return a zero with the sign of the input NaN.
2704          */
2705         float_raise(float_flag_invalid, s);
2706         a->cls = float_class_zero;
2707         break;
2708 
2709     case float_class_inf:
2710         /*
2711          * There is no Inf in the destination format.  Raise Invalid
2712          * and return the maximum normal with the correct sign.
2713          */
2714         float_raise(float_flag_invalid, s);
2715         a->cls = float_class_normal;
2716         a->exp = float16_params_ahp.exp_max;
2717         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2718                                   float16_params_ahp.frac_size + 1);
2719         break;
2720 
2721     case float_class_normal:
2722     case float_class_denormal:
2723     case float_class_zero:
2724         break;
2725 
2726     default:
2727         g_assert_not_reached();
2728     }
2729 }
2730 
2731 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2732 {
2733     if (is_nan(a->cls)) {
2734         parts_return_nan(a, s);
2735     }
2736 }
2737 
2738 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2739 {
2740     if (is_nan(a->cls)) {
2741         parts_return_nan(a, s);
2742     }
2743 }
2744 
2745 #define parts_float_to_float(P, S) \
2746     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2747 
2748 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2749                                         float_status *s)
2750 {
2751     a->cls = b->cls;
2752     a->sign = b->sign;
2753     a->exp = b->exp;
2754 
2755     if (is_anynorm(a->cls)) {
2756         frac_truncjam(a, b);
2757     } else if (is_nan(a->cls)) {
2758         /* Discard the low bits of the NaN. */
2759         a->frac = b->frac_hi;
2760         parts_return_nan(a, s);
2761     }
2762 }
2763 
2764 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2765                                        float_status *s)
2766 {
2767     a->cls = b->cls;
2768     a->sign = b->sign;
2769     a->exp = b->exp;
2770     frac_widen(a, b);
2771 
2772     if (is_nan(a->cls)) {
2773         parts_return_nan(a, s);
2774     }
2775 }
2776 
2777 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2778 {
2779     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2780     FloatParts64 p;
2781 
2782     float16a_unpack_canonical(&p, a, s, fmt16);
2783     parts_float_to_float(&p, s);
2784     return float32_round_pack_canonical(&p, s);
2785 }
2786 
2787 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2788 {
2789     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2790     FloatParts64 p;
2791 
2792     float16a_unpack_canonical(&p, a, s, fmt16);
2793     parts_float_to_float(&p, s);
2794     return float64_round_pack_canonical(&p, s);
2795 }
2796 
2797 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2798 {
2799     FloatParts64 p;
2800     const FloatFmt *fmt;
2801 
2802     float32_unpack_canonical(&p, a, s);
2803     if (ieee) {
2804         parts_float_to_float(&p, s);
2805         fmt = &float16_params;
2806     } else {
2807         parts_float_to_ahp(&p, s);
2808         fmt = &float16_params_ahp;
2809     }
2810     return float16a_round_pack_canonical(&p, s, fmt);
2811 }
2812 
2813 static float64 QEMU_SOFTFLOAT_ATTR
2814 soft_float32_to_float64(float32 a, float_status *s)
2815 {
2816     FloatParts64 p;
2817 
2818     float32_unpack_canonical(&p, a, s);
2819     parts_float_to_float(&p, s);
2820     return float64_round_pack_canonical(&p, s);
2821 }
2822 
2823 float64 float32_to_float64(float32 a, float_status *s)
2824 {
2825     if (likely(float32_is_normal(a))) {
2826         /* Widening conversion can never produce inexact results.  */
2827         union_float32 uf;
2828         union_float64 ud;
2829         uf.s = a;
2830         ud.h = uf.h;
2831         return ud.s;
2832     } else if (float32_is_zero(a)) {
2833         return float64_set_sign(float64_zero, float32_is_neg(a));
2834     } else {
2835         return soft_float32_to_float64(a, s);
2836     }
2837 }
2838 
2839 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2840 {
2841     FloatParts64 p;
2842     const FloatFmt *fmt;
2843 
2844     float64_unpack_canonical(&p, a, s);
2845     if (ieee) {
2846         parts_float_to_float(&p, s);
2847         fmt = &float16_params;
2848     } else {
2849         parts_float_to_ahp(&p, s);
2850         fmt = &float16_params_ahp;
2851     }
2852     return float16a_round_pack_canonical(&p, s, fmt);
2853 }
2854 
2855 float32 float64_to_float32(float64 a, float_status *s)
2856 {
2857     FloatParts64 p;
2858 
2859     float64_unpack_canonical(&p, a, s);
2860     parts_float_to_float(&p, s);
2861     return float32_round_pack_canonical(&p, s);
2862 }
2863 
2864 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2865 {
2866     FloatParts64 p;
2867 
2868     bfloat16_unpack_canonical(&p, a, s);
2869     parts_float_to_float(&p, s);
2870     return float32_round_pack_canonical(&p, s);
2871 }
2872 
2873 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2874 {
2875     FloatParts64 p;
2876 
2877     bfloat16_unpack_canonical(&p, a, s);
2878     parts_float_to_float(&p, s);
2879     return float64_round_pack_canonical(&p, s);
2880 }
2881 
2882 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2883 {
2884     FloatParts64 p;
2885 
2886     float32_unpack_canonical(&p, a, s);
2887     parts_float_to_float(&p, s);
2888     return bfloat16_round_pack_canonical(&p, s);
2889 }
2890 
2891 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2892 {
2893     FloatParts64 p;
2894 
2895     float64_unpack_canonical(&p, a, s);
2896     parts_float_to_float(&p, s);
2897     return bfloat16_round_pack_canonical(&p, s);
2898 }
2899 
2900 float32 float128_to_float32(float128 a, float_status *s)
2901 {
2902     FloatParts64 p64;
2903     FloatParts128 p128;
2904 
2905     float128_unpack_canonical(&p128, a, s);
2906     parts_float_to_float_narrow(&p64, &p128, s);
2907     return float32_round_pack_canonical(&p64, s);
2908 }
2909 
2910 float64 float128_to_float64(float128 a, float_status *s)
2911 {
2912     FloatParts64 p64;
2913     FloatParts128 p128;
2914 
2915     float128_unpack_canonical(&p128, a, s);
2916     parts_float_to_float_narrow(&p64, &p128, s);
2917     return float64_round_pack_canonical(&p64, s);
2918 }
2919 
2920 float128 float32_to_float128(float32 a, float_status *s)
2921 {
2922     FloatParts64 p64;
2923     FloatParts128 p128;
2924 
2925     float32_unpack_canonical(&p64, a, s);
2926     parts_float_to_float_widen(&p128, &p64, s);
2927     return float128_round_pack_canonical(&p128, s);
2928 }
2929 
2930 float128 float64_to_float128(float64 a, float_status *s)
2931 {
2932     FloatParts64 p64;
2933     FloatParts128 p128;
2934 
2935     float64_unpack_canonical(&p64, a, s);
2936     parts_float_to_float_widen(&p128, &p64, s);
2937     return float128_round_pack_canonical(&p128, s);
2938 }
2939 
2940 float32 floatx80_to_float32(floatx80 a, float_status *s)
2941 {
2942     FloatParts64 p64;
2943     FloatParts128 p128;
2944 
2945     if (floatx80_unpack_canonical(&p128, a, s)) {
2946         parts_float_to_float_narrow(&p64, &p128, s);
2947     } else {
2948         parts_default_nan(&p64, s);
2949     }
2950     return float32_round_pack_canonical(&p64, s);
2951 }
2952 
2953 float64 floatx80_to_float64(floatx80 a, float_status *s)
2954 {
2955     FloatParts64 p64;
2956     FloatParts128 p128;
2957 
2958     if (floatx80_unpack_canonical(&p128, a, s)) {
2959         parts_float_to_float_narrow(&p64, &p128, s);
2960     } else {
2961         parts_default_nan(&p64, s);
2962     }
2963     return float64_round_pack_canonical(&p64, s);
2964 }
2965 
2966 float128 floatx80_to_float128(floatx80 a, float_status *s)
2967 {
2968     FloatParts128 p;
2969 
2970     if (floatx80_unpack_canonical(&p, a, s)) {
2971         parts_float_to_float(&p, s);
2972     } else {
2973         parts_default_nan(&p, s);
2974     }
2975     return float128_round_pack_canonical(&p, s);
2976 }
2977 
2978 floatx80 float32_to_floatx80(float32 a, float_status *s)
2979 {
2980     FloatParts64 p64;
2981     FloatParts128 p128;
2982 
2983     float32_unpack_canonical(&p64, a, s);
2984     parts_float_to_float_widen(&p128, &p64, s);
2985     return floatx80_round_pack_canonical(&p128, s);
2986 }
2987 
2988 floatx80 float64_to_floatx80(float64 a, float_status *s)
2989 {
2990     FloatParts64 p64;
2991     FloatParts128 p128;
2992 
2993     float64_unpack_canonical(&p64, a, s);
2994     parts_float_to_float_widen(&p128, &p64, s);
2995     return floatx80_round_pack_canonical(&p128, s);
2996 }
2997 
2998 floatx80 float128_to_floatx80(float128 a, float_status *s)
2999 {
3000     FloatParts128 p;
3001 
3002     float128_unpack_canonical(&p, a, s);
3003     parts_float_to_float(&p, s);
3004     return floatx80_round_pack_canonical(&p, s);
3005 }
3006 
3007 /*
3008  * Round to integral value
3009  */
3010 
3011 float16 float16_round_to_int(float16 a, float_status *s)
3012 {
3013     FloatParts64 p;
3014 
3015     float16_unpack_canonical(&p, a, s);
3016     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
3017     return float16_round_pack_canonical(&p, s);
3018 }
3019 
3020 float32 float32_round_to_int(float32 a, float_status *s)
3021 {
3022     FloatParts64 p;
3023 
3024     float32_unpack_canonical(&p, a, s);
3025     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
3026     return float32_round_pack_canonical(&p, s);
3027 }
3028 
3029 float64 float64_round_to_int(float64 a, float_status *s)
3030 {
3031     FloatParts64 p;
3032 
3033     float64_unpack_canonical(&p, a, s);
3034     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3035     return float64_round_pack_canonical(&p, s);
3036 }
3037 
3038 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3039 {
3040     FloatParts64 p;
3041 
3042     bfloat16_unpack_canonical(&p, a, s);
3043     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3044     return bfloat16_round_pack_canonical(&p, s);
3045 }
3046 
3047 float128 float128_round_to_int(float128 a, float_status *s)
3048 {
3049     FloatParts128 p;
3050 
3051     float128_unpack_canonical(&p, a, s);
3052     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3053     return float128_round_pack_canonical(&p, s);
3054 }
3055 
3056 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3057 {
3058     FloatParts128 p;
3059 
3060     if (!floatx80_unpack_canonical(&p, a, status)) {
3061         return floatx80_default_nan(status);
3062     }
3063 
3064     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3065                        &floatx80_params[status->floatx80_rounding_precision]);
3066     return floatx80_round_pack_canonical(&p, status);
3067 }
3068 
3069 /*
3070  * Floating-point to signed integer conversions
3071  */
3072 
3073 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3074                               float_status *s)
3075 {
3076     FloatParts64 p;
3077 
3078     float16_unpack_canonical(&p, a, s);
3079     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3080 }
3081 
3082 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3083                                 float_status *s)
3084 {
3085     FloatParts64 p;
3086 
3087     float16_unpack_canonical(&p, a, s);
3088     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3089 }
3090 
3091 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3092                                 float_status *s)
3093 {
3094     FloatParts64 p;
3095 
3096     float16_unpack_canonical(&p, a, s);
3097     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3098 }
3099 
3100 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3101                                 float_status *s)
3102 {
3103     FloatParts64 p;
3104 
3105     float16_unpack_canonical(&p, a, s);
3106     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3107 }
3108 
3109 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3110                                 float_status *s)
3111 {
3112     FloatParts64 p;
3113 
3114     float32_unpack_canonical(&p, a, s);
3115     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3116 }
3117 
3118 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3119                                 float_status *s)
3120 {
3121     FloatParts64 p;
3122 
3123     float32_unpack_canonical(&p, a, s);
3124     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3125 }
3126 
3127 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3128                                 float_status *s)
3129 {
3130     FloatParts64 p;
3131 
3132     float32_unpack_canonical(&p, a, s);
3133     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3134 }
3135 
3136 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3137                                 float_status *s)
3138 {
3139     FloatParts64 p;
3140 
3141     float64_unpack_canonical(&p, a, s);
3142     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3143 }
3144 
3145 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3146                                 float_status *s)
3147 {
3148     FloatParts64 p;
3149 
3150     float64_unpack_canonical(&p, a, s);
3151     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3152 }
3153 
3154 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3155                                 float_status *s)
3156 {
3157     FloatParts64 p;
3158 
3159     float64_unpack_canonical(&p, a, s);
3160     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3161 }
3162 
3163 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3164                                float_status *s)
3165 {
3166     FloatParts64 p;
3167 
3168     bfloat16_unpack_canonical(&p, a, s);
3169     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3170 }
3171 
3172 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3173                                  float_status *s)
3174 {
3175     FloatParts64 p;
3176 
3177     bfloat16_unpack_canonical(&p, a, s);
3178     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3179 }
3180 
3181 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3182                                  float_status *s)
3183 {
3184     FloatParts64 p;
3185 
3186     bfloat16_unpack_canonical(&p, a, s);
3187     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3188 }
3189 
3190 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3191                                  float_status *s)
3192 {
3193     FloatParts64 p;
3194 
3195     bfloat16_unpack_canonical(&p, a, s);
3196     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3197 }
3198 
3199 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3200                                         int scale, float_status *s)
3201 {
3202     FloatParts128 p;
3203 
3204     float128_unpack_canonical(&p, a, s);
3205     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3206 }
3207 
3208 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3209                                         int scale, float_status *s)
3210 {
3211     FloatParts128 p;
3212 
3213     float128_unpack_canonical(&p, a, s);
3214     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3215 }
3216 
3217 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3218                                         int scale, float_status *s)
3219 {
3220     int flags = 0;
3221     Int128 r;
3222     FloatParts128 p;
3223 
3224     float128_unpack_canonical(&p, a, s);
3225 
3226     switch (p.cls) {
3227     case float_class_snan:
3228         flags |= float_flag_invalid_snan;
3229         /* fall through */
3230     case float_class_qnan:
3231         flags |= float_flag_invalid;
3232         r = UINT128_MAX;
3233         break;
3234 
3235     case float_class_inf:
3236         flags = float_flag_invalid | float_flag_invalid_cvti;
3237         r = p.sign ? INT128_MIN : INT128_MAX;
3238         break;
3239 
3240     case float_class_zero:
3241         return int128_zero();
3242 
3243     case float_class_normal:
3244     case float_class_denormal:
3245         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3246             flags = float_flag_inexact;
3247         }
3248 
3249         if (p.exp < 127) {
3250             int shift = 127 - p.exp;
3251             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3252             if (p.sign) {
3253                 r = int128_neg(r);
3254             }
3255         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3256                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3257             r = INT128_MIN;
3258         } else {
3259             flags = float_flag_invalid | float_flag_invalid_cvti;
3260             r = p.sign ? INT128_MIN : INT128_MAX;
3261         }
3262         break;
3263 
3264     default:
3265         g_assert_not_reached();
3266     }
3267 
3268     float_raise(flags, s);
3269     return r;
3270 }
3271 
3272 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3273                                         int scale, float_status *s)
3274 {
3275     FloatParts128 p;
3276 
3277     if (!floatx80_unpack_canonical(&p, a, s)) {
3278         parts_default_nan(&p, s);
3279     }
3280     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3281 }
3282 
3283 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3284                                         int scale, float_status *s)
3285 {
3286     FloatParts128 p;
3287 
3288     if (!floatx80_unpack_canonical(&p, a, s)) {
3289         parts_default_nan(&p, s);
3290     }
3291     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3292 }
3293 
3294 int8_t float16_to_int8(float16 a, float_status *s)
3295 {
3296     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3297 }
3298 
3299 int16_t float16_to_int16(float16 a, float_status *s)
3300 {
3301     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3302 }
3303 
3304 int32_t float16_to_int32(float16 a, float_status *s)
3305 {
3306     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3307 }
3308 
3309 int64_t float16_to_int64(float16 a, float_status *s)
3310 {
3311     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3312 }
3313 
3314 int16_t float32_to_int16(float32 a, float_status *s)
3315 {
3316     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3317 }
3318 
3319 int32_t float32_to_int32(float32 a, float_status *s)
3320 {
3321     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3322 }
3323 
3324 int64_t float32_to_int64(float32 a, float_status *s)
3325 {
3326     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3327 }
3328 
3329 int16_t float64_to_int16(float64 a, float_status *s)
3330 {
3331     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3332 }
3333 
3334 int32_t float64_to_int32(float64 a, float_status *s)
3335 {
3336     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3337 }
3338 
3339 int64_t float64_to_int64(float64 a, float_status *s)
3340 {
3341     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3342 }
3343 
3344 int32_t float128_to_int32(float128 a, float_status *s)
3345 {
3346     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3347 }
3348 
3349 int64_t float128_to_int64(float128 a, float_status *s)
3350 {
3351     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3352 }
3353 
3354 Int128 float128_to_int128(float128 a, float_status *s)
3355 {
3356     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3357 }
3358 
3359 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3360 {
3361     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3362 }
3363 
3364 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3365 {
3366     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3367 }
3368 
3369 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3370 {
3371     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3372 }
3373 
3374 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3375 {
3376     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3377 }
3378 
3379 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3380 {
3381     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3382 }
3383 
3384 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3385 {
3386     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3387 }
3388 
3389 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3390 {
3391     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3392 }
3393 
3394 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3395 {
3396     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3397 }
3398 
3399 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3400 {
3401     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3402 }
3403 
3404 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3405 {
3406     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3407 }
3408 
3409 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3410 {
3411     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3412 }
3413 
3414 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3415 {
3416     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3417 }
3418 
3419 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3420 {
3421     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3422 }
3423 
3424 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3425 {
3426     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3427 }
3428 
3429 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3430 {
3431     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3432 }
3433 
3434 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3435 {
3436     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3437 }
3438 
3439 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3440 {
3441     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3442 }
3443 
3444 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3445 {
3446     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3447 }
3448 
3449 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3450 {
3451     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3452 }
3453 
3454 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3455 {
3456     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3457 }
3458 
3459 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3460 {
3461     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3462 }
3463 
3464 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3465 {
3466     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3467 }
3468 
3469 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3470 {
3471     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3472 }
3473 
3474 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3475 {
3476     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3477 }
3478 
3479 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3480                                 float_status *s)
3481 {
3482     FloatParts64 p;
3483 
3484     float64_unpack_canonical(&p, a, s);
3485     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3486 }
3487 
3488 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3489                                 float_status *s)
3490 {
3491     FloatParts64 p;
3492 
3493     float64_unpack_canonical(&p, a, s);
3494     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3495 }
3496 
3497 /*
3498  * Floating-point to unsigned integer conversions
3499  */
3500 
3501 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3502                                 float_status *s)
3503 {
3504     FloatParts64 p;
3505 
3506     float16_unpack_canonical(&p, a, s);
3507     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3508 }
3509 
3510 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3511                                   float_status *s)
3512 {
3513     FloatParts64 p;
3514 
3515     float16_unpack_canonical(&p, a, s);
3516     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3517 }
3518 
3519 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3520                                   float_status *s)
3521 {
3522     FloatParts64 p;
3523 
3524     float16_unpack_canonical(&p, a, s);
3525     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3526 }
3527 
3528 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3529                                   float_status *s)
3530 {
3531     FloatParts64 p;
3532 
3533     float16_unpack_canonical(&p, a, s);
3534     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3535 }
3536 
3537 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3538                                   float_status *s)
3539 {
3540     FloatParts64 p;
3541 
3542     float32_unpack_canonical(&p, a, s);
3543     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3544 }
3545 
3546 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3547                                   float_status *s)
3548 {
3549     FloatParts64 p;
3550 
3551     float32_unpack_canonical(&p, a, s);
3552     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3553 }
3554 
3555 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3556                                   float_status *s)
3557 {
3558     FloatParts64 p;
3559 
3560     float32_unpack_canonical(&p, a, s);
3561     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3562 }
3563 
3564 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3565                                   float_status *s)
3566 {
3567     FloatParts64 p;
3568 
3569     float64_unpack_canonical(&p, a, s);
3570     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3571 }
3572 
3573 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3574                                   float_status *s)
3575 {
3576     FloatParts64 p;
3577 
3578     float64_unpack_canonical(&p, a, s);
3579     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3580 }
3581 
3582 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3583                                   float_status *s)
3584 {
3585     FloatParts64 p;
3586 
3587     float64_unpack_canonical(&p, a, s);
3588     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3589 }
3590 
3591 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3592                                  int scale, float_status *s)
3593 {
3594     FloatParts64 p;
3595 
3596     bfloat16_unpack_canonical(&p, a, s);
3597     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3598 }
3599 
3600 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3601                                    int scale, float_status *s)
3602 {
3603     FloatParts64 p;
3604 
3605     bfloat16_unpack_canonical(&p, a, s);
3606     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3607 }
3608 
3609 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3610                                    int scale, float_status *s)
3611 {
3612     FloatParts64 p;
3613 
3614     bfloat16_unpack_canonical(&p, a, s);
3615     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3616 }
3617 
3618 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3619                                    int scale, float_status *s)
3620 {
3621     FloatParts64 p;
3622 
3623     bfloat16_unpack_canonical(&p, a, s);
3624     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3625 }
3626 
3627 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3628                                           int scale, float_status *s)
3629 {
3630     FloatParts128 p;
3631 
3632     float128_unpack_canonical(&p, a, s);
3633     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3634 }
3635 
3636 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3637                                           int scale, float_status *s)
3638 {
3639     FloatParts128 p;
3640 
3641     float128_unpack_canonical(&p, a, s);
3642     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3643 }
3644 
3645 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3646                                          int scale, float_status *s)
3647 {
3648     int flags = 0;
3649     Int128 r;
3650     FloatParts128 p;
3651 
3652     float128_unpack_canonical(&p, a, s);
3653 
3654     switch (p.cls) {
3655     case float_class_snan:
3656         flags |= float_flag_invalid_snan;
3657         /* fall through */
3658     case float_class_qnan:
3659         flags |= float_flag_invalid;
3660         r = UINT128_MAX;
3661         break;
3662 
3663     case float_class_inf:
3664         flags = float_flag_invalid | float_flag_invalid_cvti;
3665         r = p.sign ? int128_zero() : UINT128_MAX;
3666         break;
3667 
3668     case float_class_zero:
3669         return int128_zero();
3670 
3671     case float_class_normal:
3672     case float_class_denormal:
3673         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3674             flags = float_flag_inexact;
3675             if (p.cls == float_class_zero) {
3676                 r = int128_zero();
3677                 break;
3678             }
3679         }
3680 
3681         if (p.sign) {
3682             flags = float_flag_invalid | float_flag_invalid_cvti;
3683             r = int128_zero();
3684         } else if (p.exp <= 127) {
3685             int shift = 127 - p.exp;
3686             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3687         } else {
3688             flags = float_flag_invalid | float_flag_invalid_cvti;
3689             r = UINT128_MAX;
3690         }
3691         break;
3692 
3693     default:
3694         g_assert_not_reached();
3695     }
3696 
3697     float_raise(flags, s);
3698     return r;
3699 }
3700 
3701 uint8_t float16_to_uint8(float16 a, float_status *s)
3702 {
3703     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3704 }
3705 
3706 uint16_t float16_to_uint16(float16 a, float_status *s)
3707 {
3708     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3709 }
3710 
3711 uint32_t float16_to_uint32(float16 a, float_status *s)
3712 {
3713     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3714 }
3715 
3716 uint64_t float16_to_uint64(float16 a, float_status *s)
3717 {
3718     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3719 }
3720 
3721 uint16_t float32_to_uint16(float32 a, float_status *s)
3722 {
3723     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3724 }
3725 
3726 uint32_t float32_to_uint32(float32 a, float_status *s)
3727 {
3728     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3729 }
3730 
3731 uint64_t float32_to_uint64(float32 a, float_status *s)
3732 {
3733     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3734 }
3735 
3736 uint16_t float64_to_uint16(float64 a, float_status *s)
3737 {
3738     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3739 }
3740 
3741 uint32_t float64_to_uint32(float64 a, float_status *s)
3742 {
3743     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3744 }
3745 
3746 uint64_t float64_to_uint64(float64 a, float_status *s)
3747 {
3748     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3749 }
3750 
3751 uint32_t float128_to_uint32(float128 a, float_status *s)
3752 {
3753     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3754 }
3755 
3756 uint64_t float128_to_uint64(float128 a, float_status *s)
3757 {
3758     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3759 }
3760 
3761 Int128 float128_to_uint128(float128 a, float_status *s)
3762 {
3763     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3764 }
3765 
3766 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3767 {
3768     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3769 }
3770 
3771 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3772 {
3773     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3774 }
3775 
3776 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3777 {
3778     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3779 }
3780 
3781 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3782 {
3783     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3784 }
3785 
3786 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3787 {
3788     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3789 }
3790 
3791 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3792 {
3793     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3794 }
3795 
3796 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3797 {
3798     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3799 }
3800 
3801 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3802 {
3803     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3804 }
3805 
3806 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3807 {
3808     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3809 }
3810 
3811 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3812 {
3813     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3814 }
3815 
3816 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3817 {
3818     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3819 }
3820 
3821 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3822 {
3823     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3824 }
3825 
3826 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3827 {
3828     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3829 }
3830 
3831 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3832 {
3833     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3834 }
3835 
3836 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3837 {
3838     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3839 }
3840 
3841 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3842 {
3843     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3844 }
3845 
3846 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3847 {
3848     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3849 }
3850 
3851 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3852 {
3853     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3854 }
3855 
3856 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3857 {
3858     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3859 }
3860 
3861 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3862 {
3863     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3864 }
3865 
3866 /*
3867  * Signed integer to floating-point conversions
3868  */
3869 
3870 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3871 {
3872     FloatParts64 p;
3873 
3874     parts_sint_to_float(&p, a, scale, status);
3875     return float16_round_pack_canonical(&p, status);
3876 }
3877 
3878 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3879 {
3880     return int64_to_float16_scalbn(a, scale, status);
3881 }
3882 
3883 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3884 {
3885     return int64_to_float16_scalbn(a, scale, status);
3886 }
3887 
3888 float16 int64_to_float16(int64_t a, float_status *status)
3889 {
3890     return int64_to_float16_scalbn(a, 0, status);
3891 }
3892 
3893 float16 int32_to_float16(int32_t a, float_status *status)
3894 {
3895     return int64_to_float16_scalbn(a, 0, status);
3896 }
3897 
3898 float16 int16_to_float16(int16_t a, float_status *status)
3899 {
3900     return int64_to_float16_scalbn(a, 0, status);
3901 }
3902 
3903 float16 int8_to_float16(int8_t a, float_status *status)
3904 {
3905     return int64_to_float16_scalbn(a, 0, status);
3906 }
3907 
3908 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3909 {
3910     FloatParts64 p;
3911 
3912     /* Without scaling, there are no overflow concerns. */
3913     if (likely(scale == 0) && can_use_fpu(status)) {
3914         union_float32 ur;
3915         ur.h = a;
3916         return ur.s;
3917     }
3918 
3919     parts64_sint_to_float(&p, a, scale, status);
3920     return float32_round_pack_canonical(&p, status);
3921 }
3922 
3923 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3924 {
3925     return int64_to_float32_scalbn(a, scale, status);
3926 }
3927 
3928 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3929 {
3930     return int64_to_float32_scalbn(a, scale, status);
3931 }
3932 
3933 float32 int64_to_float32(int64_t a, float_status *status)
3934 {
3935     return int64_to_float32_scalbn(a, 0, status);
3936 }
3937 
3938 float32 int32_to_float32(int32_t a, float_status *status)
3939 {
3940     return int64_to_float32_scalbn(a, 0, status);
3941 }
3942 
3943 float32 int16_to_float32(int16_t a, float_status *status)
3944 {
3945     return int64_to_float32_scalbn(a, 0, status);
3946 }
3947 
3948 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3949 {
3950     FloatParts64 p;
3951 
3952     /* Without scaling, there are no overflow concerns. */
3953     if (likely(scale == 0) && can_use_fpu(status)) {
3954         union_float64 ur;
3955         ur.h = a;
3956         return ur.s;
3957     }
3958 
3959     parts_sint_to_float(&p, a, scale, status);
3960     return float64_round_pack_canonical(&p, status);
3961 }
3962 
3963 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3964 {
3965     return int64_to_float64_scalbn(a, scale, status);
3966 }
3967 
3968 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3969 {
3970     return int64_to_float64_scalbn(a, scale, status);
3971 }
3972 
3973 float64 int64_to_float64(int64_t a, float_status *status)
3974 {
3975     return int64_to_float64_scalbn(a, 0, status);
3976 }
3977 
3978 float64 int32_to_float64(int32_t a, float_status *status)
3979 {
3980     return int64_to_float64_scalbn(a, 0, status);
3981 }
3982 
3983 float64 int16_to_float64(int16_t a, float_status *status)
3984 {
3985     return int64_to_float64_scalbn(a, 0, status);
3986 }
3987 
3988 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3989 {
3990     FloatParts64 p;
3991 
3992     parts_sint_to_float(&p, a, scale, status);
3993     return bfloat16_round_pack_canonical(&p, status);
3994 }
3995 
3996 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3997 {
3998     return int64_to_bfloat16_scalbn(a, scale, status);
3999 }
4000 
4001 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
4002 {
4003     return int64_to_bfloat16_scalbn(a, scale, status);
4004 }
4005 
4006 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
4007 {
4008     return int64_to_bfloat16_scalbn(a, scale, status);
4009 }
4010 
4011 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
4012 {
4013     return int64_to_bfloat16_scalbn(a, 0, status);
4014 }
4015 
4016 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
4017 {
4018     return int64_to_bfloat16_scalbn(a, 0, status);
4019 }
4020 
4021 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
4022 {
4023     return int64_to_bfloat16_scalbn(a, 0, status);
4024 }
4025 
4026 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
4027 {
4028     return int64_to_bfloat16_scalbn(a, 0, status);
4029 }
4030 
4031 float128 int128_to_float128(Int128 a, float_status *status)
4032 {
4033     FloatParts128 p = { };
4034     int shift;
4035 
4036     if (int128_nz(a)) {
4037         p.cls = float_class_normal;
4038         if (!int128_nonneg(a)) {
4039             p.sign = true;
4040             a = int128_neg(a);
4041         }
4042 
4043         shift = clz64(int128_gethi(a));
4044         if (shift == 64) {
4045             shift += clz64(int128_getlo(a));
4046         }
4047 
4048         p.exp = 127 - shift;
4049         a = int128_lshift(a, shift);
4050 
4051         p.frac_hi = int128_gethi(a);
4052         p.frac_lo = int128_getlo(a);
4053     } else {
4054         p.cls = float_class_zero;
4055     }
4056 
4057     return float128_round_pack_canonical(&p, status);
4058 }
4059 
4060 float128 int64_to_float128(int64_t a, float_status *status)
4061 {
4062     FloatParts128 p;
4063 
4064     parts_sint_to_float(&p, a, 0, status);
4065     return float128_round_pack_canonical(&p, status);
4066 }
4067 
4068 float128 int32_to_float128(int32_t a, float_status *status)
4069 {
4070     return int64_to_float128(a, status);
4071 }
4072 
4073 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4074 {
4075     FloatParts128 p;
4076 
4077     parts_sint_to_float(&p, a, 0, status);
4078     return floatx80_round_pack_canonical(&p, status);
4079 }
4080 
4081 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4082 {
4083     return int64_to_floatx80(a, status);
4084 }
4085 
4086 /*
4087  * Unsigned Integer to floating-point conversions
4088  */
4089 
4090 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4091 {
4092     FloatParts64 p;
4093 
4094     parts_uint_to_float(&p, a, scale, status);
4095     return float16_round_pack_canonical(&p, status);
4096 }
4097 
4098 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4099 {
4100     return uint64_to_float16_scalbn(a, scale, status);
4101 }
4102 
4103 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4104 {
4105     return uint64_to_float16_scalbn(a, scale, status);
4106 }
4107 
4108 float16 uint64_to_float16(uint64_t a, float_status *status)
4109 {
4110     return uint64_to_float16_scalbn(a, 0, status);
4111 }
4112 
4113 float16 uint32_to_float16(uint32_t a, float_status *status)
4114 {
4115     return uint64_to_float16_scalbn(a, 0, status);
4116 }
4117 
4118 float16 uint16_to_float16(uint16_t a, float_status *status)
4119 {
4120     return uint64_to_float16_scalbn(a, 0, status);
4121 }
4122 
4123 float16 uint8_to_float16(uint8_t a, float_status *status)
4124 {
4125     return uint64_to_float16_scalbn(a, 0, status);
4126 }
4127 
4128 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4129 {
4130     FloatParts64 p;
4131 
4132     /* Without scaling, there are no overflow concerns. */
4133     if (likely(scale == 0) && can_use_fpu(status)) {
4134         union_float32 ur;
4135         ur.h = a;
4136         return ur.s;
4137     }
4138 
4139     parts_uint_to_float(&p, a, scale, status);
4140     return float32_round_pack_canonical(&p, status);
4141 }
4142 
4143 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4144 {
4145     return uint64_to_float32_scalbn(a, scale, status);
4146 }
4147 
4148 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4149 {
4150     return uint64_to_float32_scalbn(a, scale, status);
4151 }
4152 
4153 float32 uint64_to_float32(uint64_t a, float_status *status)
4154 {
4155     return uint64_to_float32_scalbn(a, 0, status);
4156 }
4157 
4158 float32 uint32_to_float32(uint32_t a, float_status *status)
4159 {
4160     return uint64_to_float32_scalbn(a, 0, status);
4161 }
4162 
4163 float32 uint16_to_float32(uint16_t a, float_status *status)
4164 {
4165     return uint64_to_float32_scalbn(a, 0, status);
4166 }
4167 
4168 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4169 {
4170     FloatParts64 p;
4171 
4172     /* Without scaling, there are no overflow concerns. */
4173     if (likely(scale == 0) && can_use_fpu(status)) {
4174         union_float64 ur;
4175         ur.h = a;
4176         return ur.s;
4177     }
4178 
4179     parts_uint_to_float(&p, a, scale, status);
4180     return float64_round_pack_canonical(&p, status);
4181 }
4182 
4183 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4184 {
4185     return uint64_to_float64_scalbn(a, scale, status);
4186 }
4187 
4188 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4189 {
4190     return uint64_to_float64_scalbn(a, scale, status);
4191 }
4192 
4193 float64 uint64_to_float64(uint64_t a, float_status *status)
4194 {
4195     return uint64_to_float64_scalbn(a, 0, status);
4196 }
4197 
4198 float64 uint32_to_float64(uint32_t a, float_status *status)
4199 {
4200     return uint64_to_float64_scalbn(a, 0, status);
4201 }
4202 
4203 float64 uint16_to_float64(uint16_t a, float_status *status)
4204 {
4205     return uint64_to_float64_scalbn(a, 0, status);
4206 }
4207 
4208 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4209 {
4210     FloatParts64 p;
4211 
4212     parts_uint_to_float(&p, a, scale, status);
4213     return bfloat16_round_pack_canonical(&p, status);
4214 }
4215 
4216 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4217 {
4218     return uint64_to_bfloat16_scalbn(a, scale, status);
4219 }
4220 
4221 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4222 {
4223     return uint64_to_bfloat16_scalbn(a, scale, status);
4224 }
4225 
4226 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4227 {
4228     return uint64_to_bfloat16_scalbn(a, scale, status);
4229 }
4230 
4231 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4232 {
4233     return uint64_to_bfloat16_scalbn(a, 0, status);
4234 }
4235 
4236 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4237 {
4238     return uint64_to_bfloat16_scalbn(a, 0, status);
4239 }
4240 
4241 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4242 {
4243     return uint64_to_bfloat16_scalbn(a, 0, status);
4244 }
4245 
4246 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4247 {
4248     return uint64_to_bfloat16_scalbn(a, 0, status);
4249 }
4250 
4251 float128 uint64_to_float128(uint64_t a, float_status *status)
4252 {
4253     FloatParts128 p;
4254 
4255     parts_uint_to_float(&p, a, 0, status);
4256     return float128_round_pack_canonical(&p, status);
4257 }
4258 
4259 float128 uint128_to_float128(Int128 a, float_status *status)
4260 {
4261     FloatParts128 p = { };
4262     int shift;
4263 
4264     if (int128_nz(a)) {
4265         p.cls = float_class_normal;
4266 
4267         shift = clz64(int128_gethi(a));
4268         if (shift == 64) {
4269             shift += clz64(int128_getlo(a));
4270         }
4271 
4272         p.exp = 127 - shift;
4273         a = int128_lshift(a, shift);
4274 
4275         p.frac_hi = int128_gethi(a);
4276         p.frac_lo = int128_getlo(a);
4277     } else {
4278         p.cls = float_class_zero;
4279     }
4280 
4281     return float128_round_pack_canonical(&p, status);
4282 }
4283 
4284 /*
4285  * Minimum and maximum
4286  */
4287 
4288 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4289 {
4290     FloatParts64 pa, pb, *pr;
4291 
4292     float16_unpack_canonical(&pa, a, s);
4293     float16_unpack_canonical(&pb, b, s);
4294     pr = parts_minmax(&pa, &pb, s, flags);
4295 
4296     return float16_round_pack_canonical(pr, s);
4297 }
4298 
4299 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4300                                 float_status *s, int flags)
4301 {
4302     FloatParts64 pa, pb, *pr;
4303 
4304     bfloat16_unpack_canonical(&pa, a, s);
4305     bfloat16_unpack_canonical(&pb, b, s);
4306     pr = parts_minmax(&pa, &pb, s, flags);
4307 
4308     return bfloat16_round_pack_canonical(pr, s);
4309 }
4310 
4311 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4312 {
4313     FloatParts64 pa, pb, *pr;
4314 
4315     float32_unpack_canonical(&pa, a, s);
4316     float32_unpack_canonical(&pb, b, s);
4317     pr = parts_minmax(&pa, &pb, s, flags);
4318 
4319     return float32_round_pack_canonical(pr, s);
4320 }
4321 
4322 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4323 {
4324     FloatParts64 pa, pb, *pr;
4325 
4326     float64_unpack_canonical(&pa, a, s);
4327     float64_unpack_canonical(&pb, b, s);
4328     pr = parts_minmax(&pa, &pb, s, flags);
4329 
4330     return float64_round_pack_canonical(pr, s);
4331 }
4332 
4333 static float128 float128_minmax(float128 a, float128 b,
4334                                 float_status *s, int flags)
4335 {
4336     FloatParts128 pa, pb, *pr;
4337 
4338     float128_unpack_canonical(&pa, a, s);
4339     float128_unpack_canonical(&pb, b, s);
4340     pr = parts_minmax(&pa, &pb, s, flags);
4341 
4342     return float128_round_pack_canonical(pr, s);
4343 }
4344 
4345 #define MINMAX_1(type, name, flags) \
4346     type type##_##name(type a, type b, float_status *s) \
4347     { return type##_minmax(a, b, s, flags); }
4348 
4349 #define MINMAX_2(type) \
4350     MINMAX_1(type, max, 0)                                                \
4351     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4352     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4353     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4354     MINMAX_1(type, min, minmax_ismin)                                     \
4355     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4356     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4357     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4358 
4359 MINMAX_2(float16)
4360 MINMAX_2(bfloat16)
4361 MINMAX_2(float32)
4362 MINMAX_2(float64)
4363 MINMAX_2(float128)
4364 
4365 #undef MINMAX_1
4366 #undef MINMAX_2
4367 
4368 /*
4369  * Floating point compare
4370  */
4371 
4372 static FloatRelation QEMU_FLATTEN
4373 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4374 {
4375     FloatParts64 pa, pb;
4376 
4377     float16_unpack_canonical(&pa, a, s);
4378     float16_unpack_canonical(&pb, b, s);
4379     return parts_compare(&pa, &pb, s, is_quiet);
4380 }
4381 
4382 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4383 {
4384     return float16_do_compare(a, b, s, false);
4385 }
4386 
4387 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4388 {
4389     return float16_do_compare(a, b, s, true);
4390 }
4391 
4392 static FloatRelation QEMU_SOFTFLOAT_ATTR
4393 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4394 {
4395     FloatParts64 pa, pb;
4396 
4397     float32_unpack_canonical(&pa, a, s);
4398     float32_unpack_canonical(&pb, b, s);
4399     return parts_compare(&pa, &pb, s, is_quiet);
4400 }
4401 
4402 static FloatRelation QEMU_FLATTEN
4403 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4404 {
4405     union_float32 ua, ub;
4406 
4407     ua.s = xa;
4408     ub.s = xb;
4409 
4410     if (QEMU_NO_HARDFLOAT) {
4411         goto soft;
4412     }
4413 
4414     float32_input_flush2(&ua.s, &ub.s, s);
4415     if (isgreaterequal(ua.h, ub.h)) {
4416         if (isgreater(ua.h, ub.h)) {
4417             return float_relation_greater;
4418         }
4419         return float_relation_equal;
4420     }
4421     if (likely(isless(ua.h, ub.h))) {
4422         return float_relation_less;
4423     }
4424     /*
4425      * The only condition remaining is unordered.
4426      * Fall through to set flags.
4427      */
4428  soft:
4429     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4430 }
4431 
4432 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4433 {
4434     return float32_hs_compare(a, b, s, false);
4435 }
4436 
4437 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4438 {
4439     return float32_hs_compare(a, b, s, true);
4440 }
4441 
4442 static FloatRelation QEMU_SOFTFLOAT_ATTR
4443 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4444 {
4445     FloatParts64 pa, pb;
4446 
4447     float64_unpack_canonical(&pa, a, s);
4448     float64_unpack_canonical(&pb, b, s);
4449     return parts_compare(&pa, &pb, s, is_quiet);
4450 }
4451 
4452 static FloatRelation QEMU_FLATTEN
4453 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4454 {
4455     union_float64 ua, ub;
4456 
4457     ua.s = xa;
4458     ub.s = xb;
4459 
4460     if (QEMU_NO_HARDFLOAT) {
4461         goto soft;
4462     }
4463 
4464     float64_input_flush2(&ua.s, &ub.s, s);
4465     if (isgreaterequal(ua.h, ub.h)) {
4466         if (isgreater(ua.h, ub.h)) {
4467             return float_relation_greater;
4468         }
4469         return float_relation_equal;
4470     }
4471     if (likely(isless(ua.h, ub.h))) {
4472         return float_relation_less;
4473     }
4474     /*
4475      * The only condition remaining is unordered.
4476      * Fall through to set flags.
4477      */
4478  soft:
4479     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4480 }
4481 
4482 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4483 {
4484     return float64_hs_compare(a, b, s, false);
4485 }
4486 
4487 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4488 {
4489     return float64_hs_compare(a, b, s, true);
4490 }
4491 
4492 static FloatRelation QEMU_FLATTEN
4493 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4494 {
4495     FloatParts64 pa, pb;
4496 
4497     bfloat16_unpack_canonical(&pa, a, s);
4498     bfloat16_unpack_canonical(&pb, b, s);
4499     return parts_compare(&pa, &pb, s, is_quiet);
4500 }
4501 
4502 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4503 {
4504     return bfloat16_do_compare(a, b, s, false);
4505 }
4506 
4507 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4508 {
4509     return bfloat16_do_compare(a, b, s, true);
4510 }
4511 
4512 static FloatRelation QEMU_FLATTEN
4513 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4514 {
4515     FloatParts128 pa, pb;
4516 
4517     float128_unpack_canonical(&pa, a, s);
4518     float128_unpack_canonical(&pb, b, s);
4519     return parts_compare(&pa, &pb, s, is_quiet);
4520 }
4521 
4522 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4523 {
4524     return float128_do_compare(a, b, s, false);
4525 }
4526 
4527 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4528 {
4529     return float128_do_compare(a, b, s, true);
4530 }
4531 
4532 static FloatRelation QEMU_FLATTEN
4533 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4534 {
4535     FloatParts128 pa, pb;
4536 
4537     if (!floatx80_unpack_canonical(&pa, a, s) ||
4538         !floatx80_unpack_canonical(&pb, b, s)) {
4539         return float_relation_unordered;
4540     }
4541     return parts_compare(&pa, &pb, s, is_quiet);
4542 }
4543 
4544 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4545 {
4546     return floatx80_do_compare(a, b, s, false);
4547 }
4548 
4549 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4550 {
4551     return floatx80_do_compare(a, b, s, true);
4552 }
4553 
4554 /*
4555  * Scale by 2**N
4556  */
4557 
4558 float16 float16_scalbn(float16 a, int n, float_status *status)
4559 {
4560     FloatParts64 p;
4561 
4562     float16_unpack_canonical(&p, a, status);
4563     parts_scalbn(&p, n, status);
4564     return float16_round_pack_canonical(&p, status);
4565 }
4566 
4567 float32 float32_scalbn(float32 a, int n, float_status *status)
4568 {
4569     FloatParts64 p;
4570 
4571     float32_unpack_canonical(&p, a, status);
4572     parts_scalbn(&p, n, status);
4573     return float32_round_pack_canonical(&p, status);
4574 }
4575 
4576 float64 float64_scalbn(float64 a, int n, float_status *status)
4577 {
4578     FloatParts64 p;
4579 
4580     float64_unpack_canonical(&p, a, status);
4581     parts_scalbn(&p, n, status);
4582     return float64_round_pack_canonical(&p, status);
4583 }
4584 
4585 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4586 {
4587     FloatParts64 p;
4588 
4589     bfloat16_unpack_canonical(&p, a, status);
4590     parts_scalbn(&p, n, status);
4591     return bfloat16_round_pack_canonical(&p, status);
4592 }
4593 
4594 float128 float128_scalbn(float128 a, int n, float_status *status)
4595 {
4596     FloatParts128 p;
4597 
4598     float128_unpack_canonical(&p, a, status);
4599     parts_scalbn(&p, n, status);
4600     return float128_round_pack_canonical(&p, status);
4601 }
4602 
4603 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4604 {
4605     FloatParts128 p;
4606 
4607     if (!floatx80_unpack_canonical(&p, a, status)) {
4608         return floatx80_default_nan(status);
4609     }
4610     parts_scalbn(&p, n, status);
4611     return floatx80_round_pack_canonical(&p, status);
4612 }
4613 
4614 /*
4615  * Square Root
4616  */
4617 
4618 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4619 {
4620     FloatParts64 p;
4621 
4622     float16_unpack_canonical(&p, a, status);
4623     parts_sqrt(&p, status, &float16_params);
4624     return float16_round_pack_canonical(&p, status);
4625 }
4626 
4627 static float32 QEMU_SOFTFLOAT_ATTR
4628 soft_f32_sqrt(float32 a, float_status *status)
4629 {
4630     FloatParts64 p;
4631 
4632     float32_unpack_canonical(&p, a, status);
4633     parts_sqrt(&p, status, &float32_params);
4634     return float32_round_pack_canonical(&p, status);
4635 }
4636 
4637 static float64 QEMU_SOFTFLOAT_ATTR
4638 soft_f64_sqrt(float64 a, float_status *status)
4639 {
4640     FloatParts64 p;
4641 
4642     float64_unpack_canonical(&p, a, status);
4643     parts_sqrt(&p, status, &float64_params);
4644     return float64_round_pack_canonical(&p, status);
4645 }
4646 
4647 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4648 {
4649     union_float32 ua, ur;
4650 
4651     ua.s = xa;
4652     if (unlikely(!can_use_fpu(s))) {
4653         goto soft;
4654     }
4655 
4656     float32_input_flush1(&ua.s, s);
4657     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4658         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4659                        fpclassify(ua.h) == FP_ZERO) ||
4660                      signbit(ua.h))) {
4661             goto soft;
4662         }
4663     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4664                         float32_is_neg(ua.s))) {
4665         goto soft;
4666     }
4667     ur.h = sqrtf(ua.h);
4668     return ur.s;
4669 
4670  soft:
4671     return soft_f32_sqrt(ua.s, s);
4672 }
4673 
4674 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4675 {
4676     union_float64 ua, ur;
4677 
4678     ua.s = xa;
4679     if (unlikely(!can_use_fpu(s))) {
4680         goto soft;
4681     }
4682 
4683     float64_input_flush1(&ua.s, s);
4684     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4685         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4686                        fpclassify(ua.h) == FP_ZERO) ||
4687                      signbit(ua.h))) {
4688             goto soft;
4689         }
4690     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4691                         float64_is_neg(ua.s))) {
4692         goto soft;
4693     }
4694     ur.h = sqrt(ua.h);
4695     return ur.s;
4696 
4697  soft:
4698     return soft_f64_sqrt(ua.s, s);
4699 }
4700 
4701 float64 float64r32_sqrt(float64 a, float_status *status)
4702 {
4703     FloatParts64 p;
4704 
4705     float64_unpack_canonical(&p, a, status);
4706     parts_sqrt(&p, status, &float64_params);
4707     return float64r32_round_pack_canonical(&p, status);
4708 }
4709 
4710 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4711 {
4712     FloatParts64 p;
4713 
4714     bfloat16_unpack_canonical(&p, a, status);
4715     parts_sqrt(&p, status, &bfloat16_params);
4716     return bfloat16_round_pack_canonical(&p, status);
4717 }
4718 
4719 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4720 {
4721     FloatParts128 p;
4722 
4723     float128_unpack_canonical(&p, a, status);
4724     parts_sqrt(&p, status, &float128_params);
4725     return float128_round_pack_canonical(&p, status);
4726 }
4727 
4728 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4729 {
4730     FloatParts128 p;
4731 
4732     if (!floatx80_unpack_canonical(&p, a, s)) {
4733         return floatx80_default_nan(s);
4734     }
4735     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4736     return floatx80_round_pack_canonical(&p, s);
4737 }
4738 
4739 /*
4740  * log2
4741  */
4742 float32 float32_log2(float32 a, float_status *status)
4743 {
4744     FloatParts64 p;
4745 
4746     float32_unpack_canonical(&p, a, status);
4747     parts_log2(&p, status, &float32_params);
4748     return float32_round_pack_canonical(&p, status);
4749 }
4750 
4751 float64 float64_log2(float64 a, float_status *status)
4752 {
4753     FloatParts64 p;
4754 
4755     float64_unpack_canonical(&p, a, status);
4756     parts_log2(&p, status, &float64_params);
4757     return float64_round_pack_canonical(&p, status);
4758 }
4759 
4760 /*----------------------------------------------------------------------------
4761 | The pattern for a default generated NaN.
4762 *----------------------------------------------------------------------------*/
4763 
4764 float16 float16_default_nan(float_status *status)
4765 {
4766     FloatParts64 p;
4767 
4768     parts_default_nan(&p, status);
4769     p.frac >>= float16_params.frac_shift;
4770     return float16_pack_raw(&p);
4771 }
4772 
4773 float32 float32_default_nan(float_status *status)
4774 {
4775     FloatParts64 p;
4776 
4777     parts_default_nan(&p, status);
4778     p.frac >>= float32_params.frac_shift;
4779     return float32_pack_raw(&p);
4780 }
4781 
4782 float64 float64_default_nan(float_status *status)
4783 {
4784     FloatParts64 p;
4785 
4786     parts_default_nan(&p, status);
4787     p.frac >>= float64_params.frac_shift;
4788     return float64_pack_raw(&p);
4789 }
4790 
4791 float128 float128_default_nan(float_status *status)
4792 {
4793     FloatParts128 p;
4794 
4795     parts_default_nan(&p, status);
4796     frac_shr(&p, float128_params.frac_shift);
4797     return float128_pack_raw(&p);
4798 }
4799 
4800 bfloat16 bfloat16_default_nan(float_status *status)
4801 {
4802     FloatParts64 p;
4803 
4804     parts_default_nan(&p, status);
4805     p.frac >>= bfloat16_params.frac_shift;
4806     return bfloat16_pack_raw(&p);
4807 }
4808 
4809 /*----------------------------------------------------------------------------
4810 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4811 *----------------------------------------------------------------------------*/
4812 
4813 float16 float16_silence_nan(float16 a, float_status *status)
4814 {
4815     FloatParts64 p;
4816 
4817     float16_unpack_raw(&p, a);
4818     p.frac <<= float16_params.frac_shift;
4819     parts_silence_nan(&p, status);
4820     p.frac >>= float16_params.frac_shift;
4821     return float16_pack_raw(&p);
4822 }
4823 
4824 float32 float32_silence_nan(float32 a, float_status *status)
4825 {
4826     FloatParts64 p;
4827 
4828     float32_unpack_raw(&p, a);
4829     p.frac <<= float32_params.frac_shift;
4830     parts_silence_nan(&p, status);
4831     p.frac >>= float32_params.frac_shift;
4832     return float32_pack_raw(&p);
4833 }
4834 
4835 float64 float64_silence_nan(float64 a, float_status *status)
4836 {
4837     FloatParts64 p;
4838 
4839     float64_unpack_raw(&p, a);
4840     p.frac <<= float64_params.frac_shift;
4841     parts_silence_nan(&p, status);
4842     p.frac >>= float64_params.frac_shift;
4843     return float64_pack_raw(&p);
4844 }
4845 
4846 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4847 {
4848     FloatParts64 p;
4849 
4850     bfloat16_unpack_raw(&p, a);
4851     p.frac <<= bfloat16_params.frac_shift;
4852     parts_silence_nan(&p, status);
4853     p.frac >>= bfloat16_params.frac_shift;
4854     return bfloat16_pack_raw(&p);
4855 }
4856 
4857 float128 float128_silence_nan(float128 a, float_status *status)
4858 {
4859     FloatParts128 p;
4860 
4861     float128_unpack_raw(&p, a);
4862     frac_shl(&p, float128_params.frac_shift);
4863     parts_silence_nan(&p, status);
4864     frac_shr(&p, float128_params.frac_shift);
4865     return float128_pack_raw(&p);
4866 }
4867 
4868 /*----------------------------------------------------------------------------
4869 | If `a' is denormal and we are in flush-to-zero mode then set the
4870 | input-denormal exception and return zero. Otherwise just return the value.
4871 *----------------------------------------------------------------------------*/
4872 
4873 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4874 {
4875     if (p.exp == 0 && p.frac != 0) {
4876         float_raise(float_flag_input_denormal_flushed, status);
4877         return true;
4878     }
4879 
4880     return false;
4881 }
4882 
4883 float16 float16_squash_input_denormal(float16 a, float_status *status)
4884 {
4885     if (status->flush_inputs_to_zero) {
4886         FloatParts64 p;
4887 
4888         float16_unpack_raw(&p, a);
4889         if (parts_squash_denormal(p, status)) {
4890             return float16_set_sign(float16_zero, p.sign);
4891         }
4892     }
4893     return a;
4894 }
4895 
4896 float32 float32_squash_input_denormal(float32 a, float_status *status)
4897 {
4898     if (status->flush_inputs_to_zero) {
4899         FloatParts64 p;
4900 
4901         float32_unpack_raw(&p, a);
4902         if (parts_squash_denormal(p, status)) {
4903             return float32_set_sign(float32_zero, p.sign);
4904         }
4905     }
4906     return a;
4907 }
4908 
4909 float64 float64_squash_input_denormal(float64 a, float_status *status)
4910 {
4911     if (status->flush_inputs_to_zero) {
4912         FloatParts64 p;
4913 
4914         float64_unpack_raw(&p, a);
4915         if (parts_squash_denormal(p, status)) {
4916             return float64_set_sign(float64_zero, p.sign);
4917         }
4918     }
4919     return a;
4920 }
4921 
4922 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4923 {
4924     if (status->flush_inputs_to_zero) {
4925         FloatParts64 p;
4926 
4927         bfloat16_unpack_raw(&p, a);
4928         if (parts_squash_denormal(p, status)) {
4929             return bfloat16_set_sign(bfloat16_zero, p.sign);
4930         }
4931     }
4932     return a;
4933 }
4934 
4935 /*----------------------------------------------------------------------------
4936 | Normalizes the subnormal extended double-precision floating-point value
4937 | represented by the denormalized significand `aSig'.  The normalized exponent
4938 | and significand are stored at the locations pointed to by `zExpPtr' and
4939 | `zSigPtr', respectively.
4940 *----------------------------------------------------------------------------*/
4941 
4942 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4943                                 uint64_t *zSigPtr)
4944 {
4945     int8_t shiftCount;
4946 
4947     shiftCount = clz64(aSig);
4948     *zSigPtr = aSig<<shiftCount;
4949     *zExpPtr = 1 - shiftCount;
4950 }
4951 
4952 /*----------------------------------------------------------------------------
4953 | Takes two extended double-precision floating-point values `a' and `b', one
4954 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
4955 | `b' is a signaling NaN, the invalid exception is raised.
4956 *----------------------------------------------------------------------------*/
4957 
4958 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
4959 {
4960     FloatParts128 pa, pb, *pr;
4961 
4962     if (!floatx80_unpack_canonical(&pa, a, status) ||
4963         !floatx80_unpack_canonical(&pb, b, status)) {
4964         return floatx80_default_nan(status);
4965     }
4966 
4967     pr = parts_pick_nan(&pa, &pb, status);
4968     return floatx80_round_pack_canonical(pr, status);
4969 }
4970 
4971 /*----------------------------------------------------------------------------
4972 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4973 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4974 | and returns the proper extended double-precision floating-point value
4975 | corresponding to the abstract input.  Ordinarily, the abstract value is
4976 | rounded and packed into the extended double-precision format, with the
4977 | inexact exception raised if the abstract input cannot be represented
4978 | exactly.  However, if the abstract value is too large, the overflow and
4979 | inexact exceptions are raised and an infinity or maximal finite value is
4980 | returned.  If the abstract value is too small, the input value is rounded to
4981 | a subnormal number, and the underflow and inexact exceptions are raised if
4982 | the abstract input cannot be represented exactly as a subnormal extended
4983 | double-precision floating-point number.
4984 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4985 | the result is rounded to the same number of bits as single or double
4986 | precision, respectively.  Otherwise, the result is rounded to the full
4987 | precision of the extended double-precision format.
4988 |     The input significand must be normalized or smaller.  If the input
4989 | significand is not normalized, `zExp' must be 0; in that case, the result
4990 | returned is a subnormal number, and it must not require rounding.  The
4991 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4992 | Floating-Point Arithmetic.
4993 *----------------------------------------------------------------------------*/
4994 
4995 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4996                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4997                               float_status *status)
4998 {
4999     FloatRoundMode roundingMode;
5000     bool roundNearestEven, increment, isTiny;
5001     int64_t roundIncrement, roundMask, roundBits;
5002 
5003     roundingMode = status->float_rounding_mode;
5004     roundNearestEven = ( roundingMode == float_round_nearest_even );
5005     switch (roundingPrecision) {
5006     case floatx80_precision_x:
5007         goto precision80;
5008     case floatx80_precision_d:
5009         roundIncrement = UINT64_C(0x0000000000000400);
5010         roundMask = UINT64_C(0x00000000000007FF);
5011         break;
5012     case floatx80_precision_s:
5013         roundIncrement = UINT64_C(0x0000008000000000);
5014         roundMask = UINT64_C(0x000000FFFFFFFFFF);
5015         break;
5016     default:
5017         g_assert_not_reached();
5018     }
5019     zSig0 |= ( zSig1 != 0 );
5020     switch (roundingMode) {
5021     case float_round_nearest_even:
5022     case float_round_ties_away:
5023         break;
5024     case float_round_to_zero:
5025         roundIncrement = 0;
5026         break;
5027     case float_round_up:
5028         roundIncrement = zSign ? 0 : roundMask;
5029         break;
5030     case float_round_down:
5031         roundIncrement = zSign ? roundMask : 0;
5032         break;
5033     default:
5034         abort();
5035     }
5036     roundBits = zSig0 & roundMask;
5037     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5038         if (    ( 0x7FFE < zExp )
5039              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
5040            ) {
5041             goto overflow;
5042         }
5043         if ( zExp <= 0 ) {
5044             if (status->flush_to_zero) {
5045                 float_raise(float_flag_output_denormal_flushed, status);
5046                 return packFloatx80(zSign, 0, 0);
5047             }
5048             isTiny = status->tininess_before_rounding
5049                   || (zExp < 0 )
5050                   || (zSig0 <= zSig0 + roundIncrement);
5051             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5052             zExp = 0;
5053             roundBits = zSig0 & roundMask;
5054             if (isTiny && roundBits) {
5055                 float_raise(float_flag_underflow, status);
5056             }
5057             if (roundBits) {
5058                 float_raise(float_flag_inexact, status);
5059             }
5060             zSig0 += roundIncrement;
5061             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5062             roundIncrement = roundMask + 1;
5063             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5064                 roundMask |= roundIncrement;
5065             }
5066             zSig0 &= ~ roundMask;
5067             return packFloatx80( zSign, zExp, zSig0 );
5068         }
5069     }
5070     if (roundBits) {
5071         float_raise(float_flag_inexact, status);
5072     }
5073     zSig0 += roundIncrement;
5074     if ( zSig0 < roundIncrement ) {
5075         ++zExp;
5076         zSig0 = UINT64_C(0x8000000000000000);
5077     }
5078     roundIncrement = roundMask + 1;
5079     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5080         roundMask |= roundIncrement;
5081     }
5082     zSig0 &= ~ roundMask;
5083     if ( zSig0 == 0 ) zExp = 0;
5084     return packFloatx80( zSign, zExp, zSig0 );
5085  precision80:
5086     switch (roundingMode) {
5087     case float_round_nearest_even:
5088     case float_round_ties_away:
5089         increment = ((int64_t)zSig1 < 0);
5090         break;
5091     case float_round_to_zero:
5092         increment = 0;
5093         break;
5094     case float_round_up:
5095         increment = !zSign && zSig1;
5096         break;
5097     case float_round_down:
5098         increment = zSign && zSig1;
5099         break;
5100     default:
5101         abort();
5102     }
5103     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5104         if (    ( 0x7FFE < zExp )
5105              || (    ( zExp == 0x7FFE )
5106                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5107                   && increment
5108                 )
5109            ) {
5110             roundMask = 0;
5111  overflow:
5112             float_raise(float_flag_overflow | float_flag_inexact, status);
5113             if (    ( roundingMode == float_round_to_zero )
5114                  || ( zSign && ( roundingMode == float_round_up ) )
5115                  || ( ! zSign && ( roundingMode == float_round_down ) )
5116                ) {
5117                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5118             }
5119             return packFloatx80(zSign,
5120                                 floatx80_infinity_high,
5121                                 floatx80_infinity_low);
5122         }
5123         if ( zExp <= 0 ) {
5124             isTiny = status->tininess_before_rounding
5125                   || (zExp < 0)
5126                   || !increment
5127                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5128             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5129             zExp = 0;
5130             if (isTiny && zSig1) {
5131                 float_raise(float_flag_underflow, status);
5132             }
5133             if (zSig1) {
5134                 float_raise(float_flag_inexact, status);
5135             }
5136             switch (roundingMode) {
5137             case float_round_nearest_even:
5138             case float_round_ties_away:
5139                 increment = ((int64_t)zSig1 < 0);
5140                 break;
5141             case float_round_to_zero:
5142                 increment = 0;
5143                 break;
5144             case float_round_up:
5145                 increment = !zSign && zSig1;
5146                 break;
5147             case float_round_down:
5148                 increment = zSign && zSig1;
5149                 break;
5150             default:
5151                 abort();
5152             }
5153             if ( increment ) {
5154                 ++zSig0;
5155                 if (!(zSig1 << 1) && roundNearestEven) {
5156                     zSig0 &= ~1;
5157                 }
5158                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5159             }
5160             return packFloatx80( zSign, zExp, zSig0 );
5161         }
5162     }
5163     if (zSig1) {
5164         float_raise(float_flag_inexact, status);
5165     }
5166     if ( increment ) {
5167         ++zSig0;
5168         if ( zSig0 == 0 ) {
5169             ++zExp;
5170             zSig0 = UINT64_C(0x8000000000000000);
5171         }
5172         else {
5173             if (!(zSig1 << 1) && roundNearestEven) {
5174                 zSig0 &= ~1;
5175             }
5176         }
5177     }
5178     else {
5179         if ( zSig0 == 0 ) zExp = 0;
5180     }
5181     return packFloatx80( zSign, zExp, zSig0 );
5182 
5183 }
5184 
5185 /*----------------------------------------------------------------------------
5186 | Takes an abstract floating-point value having sign `zSign', exponent
5187 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5188 | and returns the proper extended double-precision floating-point value
5189 | corresponding to the abstract input.  This routine is just like
5190 | `roundAndPackFloatx80' except that the input significand does not have to be
5191 | normalized.
5192 *----------------------------------------------------------------------------*/
5193 
5194 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5195                                        bool zSign, int32_t zExp,
5196                                        uint64_t zSig0, uint64_t zSig1,
5197                                        float_status *status)
5198 {
5199     int8_t shiftCount;
5200 
5201     if ( zSig0 == 0 ) {
5202         zSig0 = zSig1;
5203         zSig1 = 0;
5204         zExp -= 64;
5205     }
5206     shiftCount = clz64(zSig0);
5207     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5208     zExp -= shiftCount;
5209     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5210                                 zSig0, zSig1, status);
5211 
5212 }
5213 
5214 /*----------------------------------------------------------------------------
5215 | Returns the binary exponential of the single-precision floating-point value
5216 | `a'. The operation is performed according to the IEC/IEEE Standard for
5217 | Binary Floating-Point Arithmetic.
5218 |
5219 | Uses the following identities:
5220 |
5221 | 1. -------------------------------------------------------------------------
5222 |      x    x*ln(2)
5223 |     2  = e
5224 |
5225 | 2. -------------------------------------------------------------------------
5226 |                      2     3     4     5           n
5227 |      x        x     x     x     x     x           x
5228 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5229 |               1!    2!    3!    4!    5!          n!
5230 *----------------------------------------------------------------------------*/
5231 
5232 static const float64 float32_exp2_coefficients[15] =
5233 {
5234     const_float64( 0x3ff0000000000000ll ), /*  1 */
5235     const_float64( 0x3fe0000000000000ll ), /*  2 */
5236     const_float64( 0x3fc5555555555555ll ), /*  3 */
5237     const_float64( 0x3fa5555555555555ll ), /*  4 */
5238     const_float64( 0x3f81111111111111ll ), /*  5 */
5239     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5240     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5241     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5242     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5243     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5244     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5245     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5246     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5247     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5248     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5249 };
5250 
5251 float32 float32_exp2(float32 a, float_status *status)
5252 {
5253     FloatParts64 xp, xnp, tp, rp;
5254     int i;
5255 
5256     float32_unpack_canonical(&xp, a, status);
5257     if (unlikely(xp.cls != float_class_normal)) {
5258         switch (xp.cls) {
5259         case float_class_denormal:
5260             break;
5261         case float_class_snan:
5262         case float_class_qnan:
5263             parts_return_nan(&xp, status);
5264             return float32_round_pack_canonical(&xp, status);
5265         case float_class_inf:
5266             return xp.sign ? float32_zero : a;
5267         case float_class_zero:
5268             return float32_one;
5269         default:
5270             g_assert_not_reached();
5271         }
5272     }
5273 
5274     float_raise(float_flag_inexact, status);
5275 
5276     float64_unpack_canonical(&tp, float64_ln2, status);
5277     xp = *parts_mul(&xp, &tp, status);
5278     xnp = xp;
5279 
5280     float64_unpack_canonical(&rp, float64_one, status);
5281     for (i = 0 ; i < 15 ; i++) {
5282 
5283         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5284         rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
5285         xnp = *parts_mul(&xnp, &xp, status);
5286     }
5287 
5288     return float32_round_pack_canonical(&rp, status);
5289 }
5290 
5291 /*----------------------------------------------------------------------------
5292 | Rounds the extended double-precision floating-point value `a'
5293 | to the precision provided by floatx80_rounding_precision and returns the
5294 | result as an extended double-precision floating-point value.
5295 | The operation is performed according to the IEC/IEEE Standard for Binary
5296 | Floating-Point Arithmetic.
5297 *----------------------------------------------------------------------------*/
5298 
5299 floatx80 floatx80_round(floatx80 a, float_status *status)
5300 {
5301     FloatParts128 p;
5302 
5303     if (!floatx80_unpack_canonical(&p, a, status)) {
5304         return floatx80_default_nan(status);
5305     }
5306     return floatx80_round_pack_canonical(&p, status);
5307 }
5308 
5309 static void __attribute__((constructor)) softfloat_init(void)
5310 {
5311     union_float64 ua, ub, uc, ur;
5312 
5313     if (QEMU_NO_HARDFLOAT) {
5314         return;
5315     }
5316     /*
5317      * Test that the host's FMA is not obviously broken. For example,
5318      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5319      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5320      */
5321     ua.s = 0x0020000000000001ULL;
5322     ub.s = 0x3ca0000000000000ULL;
5323     uc.s = 0x0020000000000000ULL;
5324     ur.h = fma(ua.h, ub.h, uc.h);
5325     if (ur.s != 0x0020000000000001ULL) {
5326         force_soft_fma = true;
5327     }
5328 }
5329