xref: /qemu/fpu/softfloat.c (revision 513823e7521a09ed7ad1e32e6454bac3b2cbf52d)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal_flushed, s);          \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  *
408  * Note that we canonicalize denormals, so most code should treat
409  * class_normal and class_denormal identically.
410  */
411 
412 typedef enum __attribute__ ((__packed__)) {
413     float_class_unclassified,
414     float_class_zero,
415     float_class_normal,
416     float_class_denormal, /* input was a non-squashed denormal */
417     float_class_inf,
418     float_class_qnan,  /* all NaNs from here */
419     float_class_snan,
420 } FloatClass;
421 
422 #define float_cmask(bit)  (1u << (bit))
423 
424 enum {
425     float_cmask_zero    = float_cmask(float_class_zero),
426     float_cmask_normal  = float_cmask(float_class_normal),
427     float_cmask_denormal = float_cmask(float_class_denormal),
428     float_cmask_inf     = float_cmask(float_class_inf),
429     float_cmask_qnan    = float_cmask(float_class_qnan),
430     float_cmask_snan    = float_cmask(float_class_snan),
431 
432     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
433     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
434     float_cmask_anynorm = float_cmask_normal | float_cmask_denormal,
435 };
436 
437 /* Flags for parts_minmax. */
438 enum {
439     /* Set for minimum; clear for maximum. */
440     minmax_ismin = 1,
441     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
442     minmax_isnum = 2,
443     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
444     minmax_ismag = 4,
445     /*
446      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
447      * operations.
448      */
449     minmax_isnumber = 8,
450 };
451 
452 /* Simple helpers for checking if, or what kind of, NaN we have */
453 static inline __attribute__((unused)) bool is_nan(FloatClass c)
454 {
455     return unlikely(c >= float_class_qnan);
456 }
457 
458 static inline __attribute__((unused)) bool is_snan(FloatClass c)
459 {
460     return c == float_class_snan;
461 }
462 
463 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
464 {
465     return c == float_class_qnan;
466 }
467 
468 /*
469  * Return true if the float_cmask has only normals in it
470  * (including input denormals that were canonicalized)
471  */
472 static inline bool cmask_is_only_normals(int cmask)
473 {
474     return !(cmask & ~float_cmask_anynorm);
475 }
476 
477 static inline bool is_anynorm(FloatClass c)
478 {
479     return float_cmask(c) & float_cmask_anynorm;
480 }
481 
482 /*
483  * Structure holding all of the decomposed parts of a float.
484  * The exponent is unbiased and the fraction is normalized.
485  *
486  * The fraction words are stored in big-endian word ordering,
487  * so that truncation from a larger format to a smaller format
488  * can be done simply by ignoring subsequent elements.
489  */
490 
491 typedef struct {
492     FloatClass cls;
493     bool sign;
494     int32_t exp;
495     union {
496         /* Routines that know the structure may reference the singular name. */
497         uint64_t frac;
498         /*
499          * Routines expanded with multiple structures reference "hi" and "lo"
500          * depending on the operation.  In FloatParts64, "hi" and "lo" are
501          * both the same word and aliased here.
502          */
503         uint64_t frac_hi;
504         uint64_t frac_lo;
505     };
506 } FloatParts64;
507 
508 typedef struct {
509     FloatClass cls;
510     bool sign;
511     int32_t exp;
512     uint64_t frac_hi;
513     uint64_t frac_lo;
514 } FloatParts128;
515 
516 typedef struct {
517     FloatClass cls;
518     bool sign;
519     int32_t exp;
520     uint64_t frac_hi;
521     uint64_t frac_hm;  /* high-middle */
522     uint64_t frac_lm;  /* low-middle */
523     uint64_t frac_lo;
524 } FloatParts256;
525 
526 /* These apply to the most significant word of each FloatPartsN. */
527 #define DECOMPOSED_BINARY_POINT    63
528 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
529 
530 /* Structure holding all of the relevant parameters for a format.
531  *   exp_size: the size of the exponent field
532  *   exp_bias: the offset applied to the exponent field
533  *   exp_max: the maximum normalised exponent
534  *   frac_size: the size of the fraction field
535  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
536  * The following are computed based the size of fraction
537  *   round_mask: bits below lsb which must be rounded
538  * The following optional modifiers are available:
539  *   arm_althp: handle ARM Alternative Half Precision
540  *   m68k_denormal: explicit integer bit for extended precision may be 1
541  */
542 typedef struct {
543     int exp_size;
544     int exp_bias;
545     int exp_re_bias;
546     int exp_max;
547     int frac_size;
548     int frac_shift;
549     bool arm_althp;
550     bool m68k_denormal;
551     uint64_t round_mask;
552 } FloatFmt;
553 
554 /* Expand fields based on the size of exponent and fraction */
555 #define FLOAT_PARAMS_(E)                                \
556     .exp_size       = E,                                \
557     .exp_bias       = ((1 << E) - 1) >> 1,              \
558     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
559     .exp_max        = (1 << E) - 1
560 
561 #define FLOAT_PARAMS(E, F)                              \
562     FLOAT_PARAMS_(E),                                   \
563     .frac_size      = F,                                \
564     .frac_shift     = (-F - 1) & 63,                    \
565     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
566 
567 static const FloatFmt float16_params = {
568     FLOAT_PARAMS(5, 10)
569 };
570 
571 static const FloatFmt float16_params_ahp = {
572     FLOAT_PARAMS(5, 10),
573     .arm_althp = true
574 };
575 
576 static const FloatFmt bfloat16_params = {
577     FLOAT_PARAMS(8, 7)
578 };
579 
580 static const FloatFmt float32_params = {
581     FLOAT_PARAMS(8, 23)
582 };
583 
584 static const FloatFmt float64_params = {
585     FLOAT_PARAMS(11, 52)
586 };
587 
588 static const FloatFmt float128_params = {
589     FLOAT_PARAMS(15, 112)
590 };
591 
592 #define FLOATX80_PARAMS(R)              \
593     FLOAT_PARAMS_(15),                  \
594     .frac_size = R == 64 ? 63 : R,      \
595     .frac_shift = 0,                    \
596     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
597 
598 static const FloatFmt floatx80_params[3] = {
599     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
600     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
601     [floatx80_precision_x] = {
602         FLOATX80_PARAMS(64),
603 #ifdef TARGET_M68K
604         .m68k_denormal = true,
605 #endif
606     },
607 };
608 
609 /* Unpack a float to parts, but do not canonicalize.  */
610 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
611 {
612     const int f_size = fmt->frac_size;
613     const int e_size = fmt->exp_size;
614 
615     *r = (FloatParts64) {
616         .cls = float_class_unclassified,
617         .sign = extract64(raw, f_size + e_size, 1),
618         .exp = extract64(raw, f_size, e_size),
619         .frac = extract64(raw, 0, f_size)
620     };
621 }
622 
623 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
624 {
625     unpack_raw64(p, &float16_params, f);
626 }
627 
628 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
629 {
630     unpack_raw64(p, &bfloat16_params, f);
631 }
632 
633 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
634 {
635     unpack_raw64(p, &float32_params, f);
636 }
637 
638 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
639 {
640     unpack_raw64(p, &float64_params, f);
641 }
642 
643 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
644 {
645     *p = (FloatParts128) {
646         .cls = float_class_unclassified,
647         .sign = extract32(f.high, 15, 1),
648         .exp = extract32(f.high, 0, 15),
649         .frac_hi = f.low
650     };
651 }
652 
653 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
654 {
655     const int f_size = float128_params.frac_size - 64;
656     const int e_size = float128_params.exp_size;
657 
658     *p = (FloatParts128) {
659         .cls = float_class_unclassified,
660         .sign = extract64(f.high, f_size + e_size, 1),
661         .exp = extract64(f.high, f_size, e_size),
662         .frac_hi = extract64(f.high, 0, f_size),
663         .frac_lo = f.low,
664     };
665 }
666 
667 /* Pack a float from parts, but do not canonicalize.  */
668 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
669 {
670     const int f_size = fmt->frac_size;
671     const int e_size = fmt->exp_size;
672     uint64_t ret;
673 
674     ret = (uint64_t)p->sign << (f_size + e_size);
675     ret = deposit64(ret, f_size, e_size, p->exp);
676     ret = deposit64(ret, 0, f_size, p->frac);
677     return ret;
678 }
679 
680 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
681 {
682     return make_float16(pack_raw64(p, &float16_params));
683 }
684 
685 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
686 {
687     return pack_raw64(p, &bfloat16_params);
688 }
689 
690 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
691 {
692     return make_float32(pack_raw64(p, &float32_params));
693 }
694 
695 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
696 {
697     return make_float64(pack_raw64(p, &float64_params));
698 }
699 
700 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
701 {
702     const int f_size = float128_params.frac_size - 64;
703     const int e_size = float128_params.exp_size;
704     uint64_t hi;
705 
706     hi = (uint64_t)p->sign << (f_size + e_size);
707     hi = deposit64(hi, f_size, e_size, p->exp);
708     hi = deposit64(hi, 0, f_size, p->frac_hi);
709     return make_float128(hi, p->frac_lo);
710 }
711 
712 /*----------------------------------------------------------------------------
713 | Functions and definitions to determine:  (1) whether tininess for underflow
714 | is detected before or after rounding by default, (2) what (if anything)
715 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
716 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
717 | are propagated from function inputs to output.  These details are target-
718 | specific.
719 *----------------------------------------------------------------------------*/
720 #include "softfloat-specialize.c.inc"
721 
722 #define PARTS_GENERIC_64_128(NAME, P) \
723     _Generic((P), FloatParts64 *: parts64_##NAME, \
724                   FloatParts128 *: parts128_##NAME)
725 
726 #define PARTS_GENERIC_64_128_256(NAME, P) \
727     _Generic((P), FloatParts64 *: parts64_##NAME, \
728                   FloatParts128 *: parts128_##NAME, \
729                   FloatParts256 *: parts256_##NAME)
730 
731 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
732 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
733 
734 static void parts64_return_nan(FloatParts64 *a, float_status *s);
735 static void parts128_return_nan(FloatParts128 *a, float_status *s);
736 
737 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
738 
739 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
740                                       float_status *s);
741 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
742                                         float_status *s);
743 
744 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
745 
746 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
747                                              FloatParts64 *c, float_status *s,
748                                              int ab_mask, int abc_mask);
749 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
750                                                FloatParts128 *b,
751                                                FloatParts128 *c,
752                                                float_status *s,
753                                                int ab_mask, int abc_mask);
754 
755 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
756     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
757 
758 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
759                                  const FloatFmt *fmt);
760 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
761                                   const FloatFmt *fmt);
762 
763 #define parts_canonicalize(A, S, F) \
764     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
765 
766 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
767                                    const FloatFmt *fmt);
768 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
769                                     const FloatFmt *fmt);
770 
771 #define parts_uncanon_normal(A, S, F) \
772     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
773 
774 static void parts64_uncanon(FloatParts64 *p, float_status *status,
775                             const FloatFmt *fmt);
776 static void parts128_uncanon(FloatParts128 *p, float_status *status,
777                              const FloatFmt *fmt);
778 
779 #define parts_uncanon(A, S, F) \
780     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
781 
782 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
783 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
784 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
785 
786 #define parts_add_normal(A, B) \
787     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
788 
789 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
790 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
791 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
792 
793 #define parts_sub_normal(A, B) \
794     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
795 
796 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
797                                     float_status *s, bool subtract);
798 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
799                                       float_status *s, bool subtract);
800 
801 #define parts_addsub(A, B, S, Z) \
802     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
803 
804 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
805                                  float_status *s);
806 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
807                                    float_status *s);
808 
809 #define parts_mul(A, B, S) \
810     PARTS_GENERIC_64_128(mul, A)(A, B, S)
811 
812 static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
813                                            FloatParts64 *c, int scale,
814                                            int flags, float_status *s);
815 static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
816                                              FloatParts128 *c, int scale,
817                                              int flags, float_status *s);
818 
819 #define parts_muladd_scalbn(A, B, C, Z, Y, S) \
820     PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
821 
822 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
823                                  float_status *s);
824 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
825                                    float_status *s);
826 
827 #define parts_div(A, B, S) \
828     PARTS_GENERIC_64_128(div, A)(A, B, S)
829 
830 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
831                                     uint64_t *mod_quot, float_status *s);
832 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
833                                       uint64_t *mod_quot, float_status *s);
834 
835 #define parts_modrem(A, B, Q, S) \
836     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
837 
838 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
839 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
840 
841 #define parts_sqrt(A, S, F) \
842     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
843 
844 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
845                                         int scale, int frac_size);
846 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
847                                          int scale, int frac_size);
848 
849 #define parts_round_to_int_normal(A, R, C, F) \
850     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
851 
852 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
853                                  int scale, float_status *s,
854                                  const FloatFmt *fmt);
855 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
856                                   int scale, float_status *s,
857                                   const FloatFmt *fmt);
858 
859 #define parts_round_to_int(A, R, C, S, F) \
860     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
861 
862 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
863                                      int scale, int64_t min, int64_t max,
864                                      float_status *s);
865 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
866                                      int scale, int64_t min, int64_t max,
867                                      float_status *s);
868 
869 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
870     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
871 
872 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
873                                       int scale, uint64_t max,
874                                       float_status *s);
875 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
876                                        int scale, uint64_t max,
877                                        float_status *s);
878 
879 #define parts_float_to_uint(P, R, Z, M, S) \
880     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
881 
882 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
883                                             FloatRoundMode rmode,
884                                             int bitsm1, float_status *s);
885 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
886                                              FloatRoundMode rmode,
887                                              int bitsm1, float_status *s);
888 
889 #define parts_float_to_sint_modulo(P, R, M, S) \
890     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
891 
892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
893                                   int scale, float_status *s);
894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
895                                    int scale, float_status *s);
896 
897 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
898     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
899 
900 #define parts_sint_to_float(P, I, Z, S) \
901     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
902 
903 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
904                                   int scale, float_status *s);
905 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
906                                    int scale, float_status *s);
907 
908 #define parts_uint_to_float(P, I, Z, S) \
909     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
910 
911 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
912                                     float_status *s, int flags);
913 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
914                                       float_status *s, int flags);
915 
916 #define parts_minmax(A, B, S, F) \
917     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
918 
919 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
920                                      float_status *s, bool q);
921 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
922                                       float_status *s, bool q);
923 
924 #define parts_compare(A, B, S, Q) \
925     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
926 
927 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
928 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
929 
930 #define parts_scalbn(A, N, S) \
931     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
932 
933 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
934 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
935 
936 #define parts_log2(A, S, F) \
937     PARTS_GENERIC_64_128(log2, A)(A, S, F)
938 
939 /*
940  * Helper functions for softfloat-parts.c.inc, per-size operations.
941  */
942 
943 #define FRAC_GENERIC_64_128(NAME, P) \
944     _Generic((P), FloatParts64 *: frac64_##NAME, \
945                   FloatParts128 *: frac128_##NAME)
946 
947 #define FRAC_GENERIC_64_128_256(NAME, P) \
948     _Generic((P), FloatParts64 *: frac64_##NAME, \
949                   FloatParts128 *: frac128_##NAME, \
950                   FloatParts256 *: frac256_##NAME)
951 
952 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
953 {
954     return uadd64_overflow(a->frac, b->frac, &r->frac);
955 }
956 
957 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
958 {
959     bool c = 0;
960     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
961     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
962     return c;
963 }
964 
965 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
966 {
967     bool c = 0;
968     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
969     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
970     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
971     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
972     return c;
973 }
974 
975 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
976 
977 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
978 {
979     return uadd64_overflow(a->frac, c, &r->frac);
980 }
981 
982 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
983 {
984     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
985     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
986 }
987 
988 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
989 
990 static void frac64_allones(FloatParts64 *a)
991 {
992     a->frac = -1;
993 }
994 
995 static void frac128_allones(FloatParts128 *a)
996 {
997     a->frac_hi = a->frac_lo = -1;
998 }
999 
1000 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
1001 
1002 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
1003 {
1004     return (a->frac == b->frac ? float_relation_equal
1005             : a->frac < b->frac ? float_relation_less
1006             : float_relation_greater);
1007 }
1008 
1009 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
1010 {
1011     uint64_t ta = a->frac_hi, tb = b->frac_hi;
1012     if (ta == tb) {
1013         ta = a->frac_lo, tb = b->frac_lo;
1014         if (ta == tb) {
1015             return float_relation_equal;
1016         }
1017     }
1018     return ta < tb ? float_relation_less : float_relation_greater;
1019 }
1020 
1021 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1022 
1023 static void frac64_clear(FloatParts64 *a)
1024 {
1025     a->frac = 0;
1026 }
1027 
1028 static void frac128_clear(FloatParts128 *a)
1029 {
1030     a->frac_hi = a->frac_lo = 0;
1031 }
1032 
1033 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1034 
1035 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1036 {
1037     uint64_t n1, n0, r, q;
1038     bool ret;
1039 
1040     /*
1041      * We want a 2*N / N-bit division to produce exactly an N-bit
1042      * result, so that we do not lose any precision and so that we
1043      * do not have to renormalize afterward.  If A.frac < B.frac,
1044      * then division would produce an (N-1)-bit result; shift A left
1045      * by one to produce the an N-bit result, and return true to
1046      * decrement the exponent to match.
1047      *
1048      * The udiv_qrnnd algorithm that we're using requires normalization,
1049      * i.e. the msb of the denominator must be set, which is already true.
1050      */
1051     ret = a->frac < b->frac;
1052     if (ret) {
1053         n0 = a->frac;
1054         n1 = 0;
1055     } else {
1056         n0 = a->frac >> 1;
1057         n1 = a->frac << 63;
1058     }
1059     q = udiv_qrnnd(&r, n0, n1, b->frac);
1060 
1061     /* Set lsb if there is a remainder, to set inexact. */
1062     a->frac = q | (r != 0);
1063 
1064     return ret;
1065 }
1066 
1067 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1068 {
1069     uint64_t q0, q1, a0, a1, b0, b1;
1070     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1071     bool ret = false;
1072 
1073     a0 = a->frac_hi, a1 = a->frac_lo;
1074     b0 = b->frac_hi, b1 = b->frac_lo;
1075 
1076     ret = lt128(a0, a1, b0, b1);
1077     if (!ret) {
1078         a1 = shr_double(a0, a1, 1);
1079         a0 = a0 >> 1;
1080     }
1081 
1082     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1083     q0 = estimateDiv128To64(a0, a1, b0);
1084 
1085     /*
1086      * Estimate is high because B1 was not included (unless B1 == 0).
1087      * Reduce quotient and increase remainder until remainder is non-negative.
1088      * This loop will execute 0 to 2 times.
1089      */
1090     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1091     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1092     while (r0 != 0) {
1093         q0--;
1094         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1095     }
1096 
1097     /* Repeat using the remainder, producing a second word of quotient. */
1098     q1 = estimateDiv128To64(r1, r2, b0);
1099     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1100     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1101     while (r1 != 0) {
1102         q1--;
1103         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1104     }
1105 
1106     /* Any remainder indicates inexact; set sticky bit. */
1107     q1 |= (r2 | r3) != 0;
1108 
1109     a->frac_hi = q0;
1110     a->frac_lo = q1;
1111     return ret;
1112 }
1113 
1114 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1115 
1116 static bool frac64_eqz(FloatParts64 *a)
1117 {
1118     return a->frac == 0;
1119 }
1120 
1121 static bool frac128_eqz(FloatParts128 *a)
1122 {
1123     return (a->frac_hi | a->frac_lo) == 0;
1124 }
1125 
1126 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1127 
1128 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1129 {
1130     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1131 }
1132 
1133 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1134 {
1135     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1136                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1137 }
1138 
1139 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1140 
1141 static void frac64_neg(FloatParts64 *a)
1142 {
1143     a->frac = -a->frac;
1144 }
1145 
1146 static void frac128_neg(FloatParts128 *a)
1147 {
1148     bool c = 0;
1149     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1150     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1151 }
1152 
1153 static void frac256_neg(FloatParts256 *a)
1154 {
1155     bool c = 0;
1156     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1157     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1158     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1159     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1160 }
1161 
1162 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1163 
1164 static int frac64_normalize(FloatParts64 *a)
1165 {
1166     if (a->frac) {
1167         int shift = clz64(a->frac);
1168         a->frac <<= shift;
1169         return shift;
1170     }
1171     return 64;
1172 }
1173 
1174 static int frac128_normalize(FloatParts128 *a)
1175 {
1176     if (a->frac_hi) {
1177         int shl = clz64(a->frac_hi);
1178         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1179         a->frac_lo <<= shl;
1180         return shl;
1181     } else if (a->frac_lo) {
1182         int shl = clz64(a->frac_lo);
1183         a->frac_hi = a->frac_lo << shl;
1184         a->frac_lo = 0;
1185         return shl + 64;
1186     }
1187     return 128;
1188 }
1189 
1190 static int frac256_normalize(FloatParts256 *a)
1191 {
1192     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1193     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1194     int ret, shl;
1195 
1196     if (likely(a0)) {
1197         shl = clz64(a0);
1198         if (shl == 0) {
1199             return 0;
1200         }
1201         ret = shl;
1202     } else {
1203         if (a1) {
1204             ret = 64;
1205             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1206         } else if (a2) {
1207             ret = 128;
1208             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1209         } else if (a3) {
1210             ret = 192;
1211             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1212         } else {
1213             ret = 256;
1214             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1215             goto done;
1216         }
1217         shl = clz64(a0);
1218         if (shl == 0) {
1219             goto done;
1220         }
1221         ret += shl;
1222     }
1223 
1224     a0 = shl_double(a0, a1, shl);
1225     a1 = shl_double(a1, a2, shl);
1226     a2 = shl_double(a2, a3, shl);
1227     a3 <<= shl;
1228 
1229  done:
1230     a->frac_hi = a0;
1231     a->frac_hm = a1;
1232     a->frac_lm = a2;
1233     a->frac_lo = a3;
1234     return ret;
1235 }
1236 
1237 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1238 
1239 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1240 {
1241     uint64_t a0, a1, b0, t0, t1, q, quot;
1242     int exp_diff = a->exp - b->exp;
1243     int shift;
1244 
1245     a0 = a->frac;
1246     a1 = 0;
1247 
1248     if (exp_diff < -1) {
1249         if (mod_quot) {
1250             *mod_quot = 0;
1251         }
1252         return;
1253     }
1254     if (exp_diff == -1) {
1255         a0 >>= 1;
1256         exp_diff = 0;
1257     }
1258 
1259     b0 = b->frac;
1260     quot = q = b0 <= a0;
1261     if (q) {
1262         a0 -= b0;
1263     }
1264 
1265     exp_diff -= 64;
1266     while (exp_diff > 0) {
1267         q = estimateDiv128To64(a0, a1, b0);
1268         q = q > 2 ? q - 2 : 0;
1269         mul64To128(b0, q, &t0, &t1);
1270         sub128(a0, a1, t0, t1, &a0, &a1);
1271         shortShift128Left(a0, a1, 62, &a0, &a1);
1272         exp_diff -= 62;
1273         quot = (quot << 62) + q;
1274     }
1275 
1276     exp_diff += 64;
1277     if (exp_diff > 0) {
1278         q = estimateDiv128To64(a0, a1, b0);
1279         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1280         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1281         sub128(a0, a1, t0, t1, &a0, &a1);
1282         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1283         while (le128(t0, t1, a0, a1)) {
1284             ++q;
1285             sub128(a0, a1, t0, t1, &a0, &a1);
1286         }
1287         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1288     } else {
1289         t0 = b0;
1290         t1 = 0;
1291     }
1292 
1293     if (mod_quot) {
1294         *mod_quot = quot;
1295     } else {
1296         sub128(t0, t1, a0, a1, &t0, &t1);
1297         if (lt128(t0, t1, a0, a1) ||
1298             (eq128(t0, t1, a0, a1) && (q & 1))) {
1299             a0 = t0;
1300             a1 = t1;
1301             a->sign = !a->sign;
1302         }
1303     }
1304 
1305     if (likely(a0)) {
1306         shift = clz64(a0);
1307         shortShift128Left(a0, a1, shift, &a0, &a1);
1308     } else if (likely(a1)) {
1309         shift = clz64(a1);
1310         a0 = a1 << shift;
1311         a1 = 0;
1312         shift += 64;
1313     } else {
1314         a->cls = float_class_zero;
1315         return;
1316     }
1317 
1318     a->exp = b->exp + exp_diff - shift;
1319     a->frac = a0 | (a1 != 0);
1320 }
1321 
1322 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1323                            uint64_t *mod_quot)
1324 {
1325     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1326     int exp_diff = a->exp - b->exp;
1327     int shift;
1328 
1329     a0 = a->frac_hi;
1330     a1 = a->frac_lo;
1331     a2 = 0;
1332 
1333     if (exp_diff < -1) {
1334         if (mod_quot) {
1335             *mod_quot = 0;
1336         }
1337         return;
1338     }
1339     if (exp_diff == -1) {
1340         shift128Right(a0, a1, 1, &a0, &a1);
1341         exp_diff = 0;
1342     }
1343 
1344     b0 = b->frac_hi;
1345     b1 = b->frac_lo;
1346 
1347     quot = q = le128(b0, b1, a0, a1);
1348     if (q) {
1349         sub128(a0, a1, b0, b1, &a0, &a1);
1350     }
1351 
1352     exp_diff -= 64;
1353     while (exp_diff > 0) {
1354         q = estimateDiv128To64(a0, a1, b0);
1355         q = q > 4 ? q - 4 : 0;
1356         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1357         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1358         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1359         exp_diff -= 61;
1360         quot = (quot << 61) + q;
1361     }
1362 
1363     exp_diff += 64;
1364     if (exp_diff > 0) {
1365         q = estimateDiv128To64(a0, a1, b0);
1366         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1367         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1368         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1369         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1370         while (le192(t0, t1, t2, a0, a1, a2)) {
1371             ++q;
1372             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1373         }
1374         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1375     } else {
1376         t0 = b0;
1377         t1 = b1;
1378         t2 = 0;
1379     }
1380 
1381     if (mod_quot) {
1382         *mod_quot = quot;
1383     } else {
1384         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1385         if (lt192(t0, t1, t2, a0, a1, a2) ||
1386             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1387             a0 = t0;
1388             a1 = t1;
1389             a2 = t2;
1390             a->sign = !a->sign;
1391         }
1392     }
1393 
1394     if (likely(a0)) {
1395         shift = clz64(a0);
1396         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1397     } else if (likely(a1)) {
1398         shift = clz64(a1);
1399         shortShift128Left(a1, a2, shift, &a0, &a1);
1400         a2 = 0;
1401         shift += 64;
1402     } else if (likely(a2)) {
1403         shift = clz64(a2);
1404         a0 = a2 << shift;
1405         a1 = a2 = 0;
1406         shift += 128;
1407     } else {
1408         a->cls = float_class_zero;
1409         return;
1410     }
1411 
1412     a->exp = b->exp + exp_diff - shift;
1413     a->frac_hi = a0;
1414     a->frac_lo = a1 | (a2 != 0);
1415 }
1416 
1417 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1418 
1419 static void frac64_shl(FloatParts64 *a, int c)
1420 {
1421     a->frac <<= c;
1422 }
1423 
1424 static void frac128_shl(FloatParts128 *a, int c)
1425 {
1426     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1427 
1428     if (c & 64) {
1429         a0 = a1, a1 = 0;
1430     }
1431 
1432     c &= 63;
1433     if (c) {
1434         a0 = shl_double(a0, a1, c);
1435         a1 = a1 << c;
1436     }
1437 
1438     a->frac_hi = a0;
1439     a->frac_lo = a1;
1440 }
1441 
1442 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1443 
1444 static void frac64_shr(FloatParts64 *a, int c)
1445 {
1446     a->frac >>= c;
1447 }
1448 
1449 static void frac128_shr(FloatParts128 *a, int c)
1450 {
1451     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1452 
1453     if (c & 64) {
1454         a1 = a0, a0 = 0;
1455     }
1456 
1457     c &= 63;
1458     if (c) {
1459         a1 = shr_double(a0, a1, c);
1460         a0 = a0 >> c;
1461     }
1462 
1463     a->frac_hi = a0;
1464     a->frac_lo = a1;
1465 }
1466 
1467 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1468 
1469 static void frac64_shrjam(FloatParts64 *a, int c)
1470 {
1471     uint64_t a0 = a->frac;
1472 
1473     if (likely(c != 0)) {
1474         if (likely(c < 64)) {
1475             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1476         } else {
1477             a0 = a0 != 0;
1478         }
1479         a->frac = a0;
1480     }
1481 }
1482 
1483 static void frac128_shrjam(FloatParts128 *a, int c)
1484 {
1485     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1486     uint64_t sticky = 0;
1487 
1488     if (unlikely(c == 0)) {
1489         return;
1490     } else if (likely(c < 64)) {
1491         /* nothing */
1492     } else if (likely(c < 128)) {
1493         sticky = a1;
1494         a1 = a0;
1495         a0 = 0;
1496         c &= 63;
1497         if (c == 0) {
1498             goto done;
1499         }
1500     } else {
1501         sticky = a0 | a1;
1502         a0 = a1 = 0;
1503         goto done;
1504     }
1505 
1506     sticky |= shr_double(a1, 0, c);
1507     a1 = shr_double(a0, a1, c);
1508     a0 = a0 >> c;
1509 
1510  done:
1511     a->frac_lo = a1 | (sticky != 0);
1512     a->frac_hi = a0;
1513 }
1514 
1515 static void frac256_shrjam(FloatParts256 *a, int c)
1516 {
1517     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1518     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1519     uint64_t sticky = 0;
1520 
1521     if (unlikely(c == 0)) {
1522         return;
1523     } else if (likely(c < 64)) {
1524         /* nothing */
1525     } else if (likely(c < 256)) {
1526         if (unlikely(c & 128)) {
1527             sticky |= a2 | a3;
1528             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1529         }
1530         if (unlikely(c & 64)) {
1531             sticky |= a3;
1532             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1533         }
1534         c &= 63;
1535         if (c == 0) {
1536             goto done;
1537         }
1538     } else {
1539         sticky = a0 | a1 | a2 | a3;
1540         a0 = a1 = a2 = a3 = 0;
1541         goto done;
1542     }
1543 
1544     sticky |= shr_double(a3, 0, c);
1545     a3 = shr_double(a2, a3, c);
1546     a2 = shr_double(a1, a2, c);
1547     a1 = shr_double(a0, a1, c);
1548     a0 = a0 >> c;
1549 
1550  done:
1551     a->frac_lo = a3 | (sticky != 0);
1552     a->frac_lm = a2;
1553     a->frac_hm = a1;
1554     a->frac_hi = a0;
1555 }
1556 
1557 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1558 
1559 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1560 {
1561     return usub64_overflow(a->frac, b->frac, &r->frac);
1562 }
1563 
1564 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1565 {
1566     bool c = 0;
1567     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1568     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1569     return c;
1570 }
1571 
1572 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1573 {
1574     bool c = 0;
1575     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1576     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1577     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1578     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1579     return c;
1580 }
1581 
1582 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1583 
1584 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1585 {
1586     r->frac = a->frac_hi | (a->frac_lo != 0);
1587 }
1588 
1589 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1590 {
1591     r->frac_hi = a->frac_hi;
1592     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1593 }
1594 
1595 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1596 
1597 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1598 {
1599     r->frac_hi = a->frac;
1600     r->frac_lo = 0;
1601 }
1602 
1603 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1604 {
1605     r->frac_hi = a->frac_hi;
1606     r->frac_hm = a->frac_lo;
1607     r->frac_lm = 0;
1608     r->frac_lo = 0;
1609 }
1610 
1611 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1612 
1613 /*
1614  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1615  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1616  * and thus MIT licenced.
1617  */
1618 static const uint16_t rsqrt_tab[128] = {
1619     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1620     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1621     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1622     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1623     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1624     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1625     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1626     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1627     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1628     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1629     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1630     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1631     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1632     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1633     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1634     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1635 };
1636 
1637 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1638 #define FloatPartsN    glue(FloatParts,N)
1639 #define FloatPartsW    glue(FloatParts,W)
1640 
1641 #define N 64
1642 #define W 128
1643 
1644 #include "softfloat-parts-addsub.c.inc"
1645 #include "softfloat-parts.c.inc"
1646 
1647 #undef  N
1648 #undef  W
1649 #define N 128
1650 #define W 256
1651 
1652 #include "softfloat-parts-addsub.c.inc"
1653 #include "softfloat-parts.c.inc"
1654 
1655 #undef  N
1656 #undef  W
1657 #define N            256
1658 
1659 #include "softfloat-parts-addsub.c.inc"
1660 
1661 #undef  N
1662 #undef  W
1663 #undef  partsN
1664 #undef  FloatPartsN
1665 #undef  FloatPartsW
1666 
1667 /*
1668  * Pack/unpack routines with a specific FloatFmt.
1669  */
1670 
1671 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1672                                       float_status *s, const FloatFmt *params)
1673 {
1674     float16_unpack_raw(p, f);
1675     parts_canonicalize(p, s, params);
1676 }
1677 
1678 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1679                                      float_status *s)
1680 {
1681     float16a_unpack_canonical(p, f, s, &float16_params);
1682 }
1683 
1684 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1685                                       float_status *s)
1686 {
1687     bfloat16_unpack_raw(p, f);
1688     parts_canonicalize(p, s, &bfloat16_params);
1689 }
1690 
1691 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1692                                              float_status *s,
1693                                              const FloatFmt *params)
1694 {
1695     parts_uncanon(p, s, params);
1696     return float16_pack_raw(p);
1697 }
1698 
1699 static float16 float16_round_pack_canonical(FloatParts64 *p,
1700                                             float_status *s)
1701 {
1702     return float16a_round_pack_canonical(p, s, &float16_params);
1703 }
1704 
1705 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1706                                               float_status *s)
1707 {
1708     parts_uncanon(p, s, &bfloat16_params);
1709     return bfloat16_pack_raw(p);
1710 }
1711 
1712 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1713                                      float_status *s)
1714 {
1715     float32_unpack_raw(p, f);
1716     parts_canonicalize(p, s, &float32_params);
1717 }
1718 
1719 static float32 float32_round_pack_canonical(FloatParts64 *p,
1720                                             float_status *s)
1721 {
1722     parts_uncanon(p, s, &float32_params);
1723     return float32_pack_raw(p);
1724 }
1725 
1726 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1727                                      float_status *s)
1728 {
1729     float64_unpack_raw(p, f);
1730     parts_canonicalize(p, s, &float64_params);
1731 }
1732 
1733 static float64 float64_round_pack_canonical(FloatParts64 *p,
1734                                             float_status *s)
1735 {
1736     parts_uncanon(p, s, &float64_params);
1737     return float64_pack_raw(p);
1738 }
1739 
1740 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1741                                                float_status *s)
1742 {
1743     parts_uncanon(p, s, &float32_params);
1744 
1745     /*
1746      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1747      * We need to adjust the fraction higher so that the least N bits are
1748      * zero, and the fraction is adjacent to the float64 implicit bit.
1749      */
1750     switch (p->cls) {
1751     case float_class_normal:
1752     case float_class_denormal:
1753         if (unlikely(p->exp == 0)) {
1754             /*
1755              * The result is denormal for float32, but can be represented
1756              * in normalized form for float64.  Adjust, per canonicalize.
1757              */
1758             int shift = frac_normalize(p);
1759             p->exp = (float32_params.frac_shift -
1760                       float32_params.exp_bias - shift + 1 +
1761                       float64_params.exp_bias);
1762             frac_shr(p, float64_params.frac_shift);
1763         } else {
1764             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1765             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1766         }
1767         break;
1768     case float_class_snan:
1769     case float_class_qnan:
1770         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1771         p->exp = float64_params.exp_max;
1772         break;
1773     case float_class_inf:
1774         p->exp = float64_params.exp_max;
1775         break;
1776     case float_class_zero:
1777         break;
1778     default:
1779         g_assert_not_reached();
1780     }
1781 
1782     return float64_pack_raw(p);
1783 }
1784 
1785 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1786                                       float_status *s)
1787 {
1788     float128_unpack_raw(p, f);
1789     parts_canonicalize(p, s, &float128_params);
1790 }
1791 
1792 static float128 float128_round_pack_canonical(FloatParts128 *p,
1793                                               float_status *s)
1794 {
1795     parts_uncanon(p, s, &float128_params);
1796     return float128_pack_raw(p);
1797 }
1798 
1799 /* Returns false if the encoding is invalid. */
1800 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1801                                       float_status *s)
1802 {
1803     /* Ensure rounding precision is set before beginning. */
1804     switch (s->floatx80_rounding_precision) {
1805     case floatx80_precision_x:
1806     case floatx80_precision_d:
1807     case floatx80_precision_s:
1808         break;
1809     default:
1810         g_assert_not_reached();
1811     }
1812 
1813     if (unlikely(floatx80_invalid_encoding(f))) {
1814         float_raise(float_flag_invalid, s);
1815         return false;
1816     }
1817 
1818     floatx80_unpack_raw(p, f);
1819 
1820     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1821         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1822     } else {
1823         /* The explicit integer bit is ignored, after invalid checks. */
1824         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1825         p->cls = (p->frac_hi == 0 ? float_class_inf
1826                   : parts_is_snan_frac(p->frac_hi, s)
1827                   ? float_class_snan : float_class_qnan);
1828     }
1829     return true;
1830 }
1831 
1832 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1833                                               float_status *s)
1834 {
1835     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1836     uint64_t frac;
1837     int exp;
1838 
1839     switch (p->cls) {
1840     case float_class_normal:
1841     case float_class_denormal:
1842         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1843             parts_uncanon_normal(p, s, fmt);
1844             frac = p->frac_hi;
1845             exp = p->exp;
1846         } else {
1847             FloatParts64 p64;
1848 
1849             p64.sign = p->sign;
1850             p64.exp = p->exp;
1851             frac_truncjam(&p64, p);
1852             parts_uncanon_normal(&p64, s, fmt);
1853             frac = p64.frac;
1854             exp = p64.exp;
1855         }
1856         if (exp != fmt->exp_max) {
1857             break;
1858         }
1859         /* rounded to inf -- fall through to set frac correctly */
1860 
1861     case float_class_inf:
1862         /* x86 and m68k differ in the setting of the integer bit. */
1863         frac = floatx80_infinity_low;
1864         exp = fmt->exp_max;
1865         break;
1866 
1867     case float_class_zero:
1868         frac = 0;
1869         exp = 0;
1870         break;
1871 
1872     case float_class_snan:
1873     case float_class_qnan:
1874         /* NaNs have the integer bit set. */
1875         frac = p->frac_hi | (1ull << 63);
1876         exp = fmt->exp_max;
1877         break;
1878 
1879     default:
1880         g_assert_not_reached();
1881     }
1882 
1883     return packFloatx80(p->sign, exp, frac);
1884 }
1885 
1886 /*
1887  * Addition and subtraction
1888  */
1889 
1890 static float16 QEMU_FLATTEN
1891 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1892 {
1893     FloatParts64 pa, pb, *pr;
1894 
1895     float16_unpack_canonical(&pa, a, status);
1896     float16_unpack_canonical(&pb, b, status);
1897     pr = parts_addsub(&pa, &pb, status, subtract);
1898 
1899     return float16_round_pack_canonical(pr, status);
1900 }
1901 
1902 float16 float16_add(float16 a, float16 b, float_status *status)
1903 {
1904     return float16_addsub(a, b, status, false);
1905 }
1906 
1907 float16 float16_sub(float16 a, float16 b, float_status *status)
1908 {
1909     return float16_addsub(a, b, status, true);
1910 }
1911 
1912 static float32 QEMU_SOFTFLOAT_ATTR
1913 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1914 {
1915     FloatParts64 pa, pb, *pr;
1916 
1917     float32_unpack_canonical(&pa, a, status);
1918     float32_unpack_canonical(&pb, b, status);
1919     pr = parts_addsub(&pa, &pb, status, subtract);
1920 
1921     return float32_round_pack_canonical(pr, status);
1922 }
1923 
1924 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1925 {
1926     return soft_f32_addsub(a, b, status, false);
1927 }
1928 
1929 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1930 {
1931     return soft_f32_addsub(a, b, status, true);
1932 }
1933 
1934 static float64 QEMU_SOFTFLOAT_ATTR
1935 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1936 {
1937     FloatParts64 pa, pb, *pr;
1938 
1939     float64_unpack_canonical(&pa, a, status);
1940     float64_unpack_canonical(&pb, b, status);
1941     pr = parts_addsub(&pa, &pb, status, subtract);
1942 
1943     return float64_round_pack_canonical(pr, status);
1944 }
1945 
1946 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1947 {
1948     return soft_f64_addsub(a, b, status, false);
1949 }
1950 
1951 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1952 {
1953     return soft_f64_addsub(a, b, status, true);
1954 }
1955 
1956 static float hard_f32_add(float a, float b)
1957 {
1958     return a + b;
1959 }
1960 
1961 static float hard_f32_sub(float a, float b)
1962 {
1963     return a - b;
1964 }
1965 
1966 static double hard_f64_add(double a, double b)
1967 {
1968     return a + b;
1969 }
1970 
1971 static double hard_f64_sub(double a, double b)
1972 {
1973     return a - b;
1974 }
1975 
1976 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1977 {
1978     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1979         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1980     }
1981     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1982 }
1983 
1984 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1985 {
1986     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1987         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1988     } else {
1989         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1990     }
1991 }
1992 
1993 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1994                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1995 {
1996     return float32_gen2(a, b, s, hard, soft,
1997                         f32_is_zon2, f32_addsubmul_post);
1998 }
1999 
2000 static float64 float64_addsub(float64 a, float64 b, float_status *s,
2001                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
2002 {
2003     return float64_gen2(a, b, s, hard, soft,
2004                         f64_is_zon2, f64_addsubmul_post);
2005 }
2006 
2007 float32 QEMU_FLATTEN
2008 float32_add(float32 a, float32 b, float_status *s)
2009 {
2010     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
2011 }
2012 
2013 float32 QEMU_FLATTEN
2014 float32_sub(float32 a, float32 b, float_status *s)
2015 {
2016     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
2017 }
2018 
2019 float64 QEMU_FLATTEN
2020 float64_add(float64 a, float64 b, float_status *s)
2021 {
2022     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2023 }
2024 
2025 float64 QEMU_FLATTEN
2026 float64_sub(float64 a, float64 b, float_status *s)
2027 {
2028     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2029 }
2030 
2031 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2032                                  bool subtract)
2033 {
2034     FloatParts64 pa, pb, *pr;
2035 
2036     float64_unpack_canonical(&pa, a, status);
2037     float64_unpack_canonical(&pb, b, status);
2038     pr = parts_addsub(&pa, &pb, status, subtract);
2039 
2040     return float64r32_round_pack_canonical(pr, status);
2041 }
2042 
2043 float64 float64r32_add(float64 a, float64 b, float_status *status)
2044 {
2045     return float64r32_addsub(a, b, status, false);
2046 }
2047 
2048 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2049 {
2050     return float64r32_addsub(a, b, status, true);
2051 }
2052 
2053 static bfloat16 QEMU_FLATTEN
2054 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2055 {
2056     FloatParts64 pa, pb, *pr;
2057 
2058     bfloat16_unpack_canonical(&pa, a, status);
2059     bfloat16_unpack_canonical(&pb, b, status);
2060     pr = parts_addsub(&pa, &pb, status, subtract);
2061 
2062     return bfloat16_round_pack_canonical(pr, status);
2063 }
2064 
2065 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2066 {
2067     return bfloat16_addsub(a, b, status, false);
2068 }
2069 
2070 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2071 {
2072     return bfloat16_addsub(a, b, status, true);
2073 }
2074 
2075 static float128 QEMU_FLATTEN
2076 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079 
2080     float128_unpack_canonical(&pa, a, status);
2081     float128_unpack_canonical(&pb, b, status);
2082     pr = parts_addsub(&pa, &pb, status, subtract);
2083 
2084     return float128_round_pack_canonical(pr, status);
2085 }
2086 
2087 float128 float128_add(float128 a, float128 b, float_status *status)
2088 {
2089     return float128_addsub(a, b, status, false);
2090 }
2091 
2092 float128 float128_sub(float128 a, float128 b, float_status *status)
2093 {
2094     return float128_addsub(a, b, status, true);
2095 }
2096 
2097 static floatx80 QEMU_FLATTEN
2098 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2099 {
2100     FloatParts128 pa, pb, *pr;
2101 
2102     if (!floatx80_unpack_canonical(&pa, a, status) ||
2103         !floatx80_unpack_canonical(&pb, b, status)) {
2104         return floatx80_default_nan(status);
2105     }
2106 
2107     pr = parts_addsub(&pa, &pb, status, subtract);
2108     return floatx80_round_pack_canonical(pr, status);
2109 }
2110 
2111 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2112 {
2113     return floatx80_addsub(a, b, status, false);
2114 }
2115 
2116 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2117 {
2118     return floatx80_addsub(a, b, status, true);
2119 }
2120 
2121 /*
2122  * Multiplication
2123  */
2124 
2125 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2126 {
2127     FloatParts64 pa, pb, *pr;
2128 
2129     float16_unpack_canonical(&pa, a, status);
2130     float16_unpack_canonical(&pb, b, status);
2131     pr = parts_mul(&pa, &pb, status);
2132 
2133     return float16_round_pack_canonical(pr, status);
2134 }
2135 
2136 static float32 QEMU_SOFTFLOAT_ATTR
2137 soft_f32_mul(float32 a, float32 b, float_status *status)
2138 {
2139     FloatParts64 pa, pb, *pr;
2140 
2141     float32_unpack_canonical(&pa, a, status);
2142     float32_unpack_canonical(&pb, b, status);
2143     pr = parts_mul(&pa, &pb, status);
2144 
2145     return float32_round_pack_canonical(pr, status);
2146 }
2147 
2148 static float64 QEMU_SOFTFLOAT_ATTR
2149 soft_f64_mul(float64 a, float64 b, float_status *status)
2150 {
2151     FloatParts64 pa, pb, *pr;
2152 
2153     float64_unpack_canonical(&pa, a, status);
2154     float64_unpack_canonical(&pb, b, status);
2155     pr = parts_mul(&pa, &pb, status);
2156 
2157     return float64_round_pack_canonical(pr, status);
2158 }
2159 
2160 static float hard_f32_mul(float a, float b)
2161 {
2162     return a * b;
2163 }
2164 
2165 static double hard_f64_mul(double a, double b)
2166 {
2167     return a * b;
2168 }
2169 
2170 float32 QEMU_FLATTEN
2171 float32_mul(float32 a, float32 b, float_status *s)
2172 {
2173     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2174                         f32_is_zon2, f32_addsubmul_post);
2175 }
2176 
2177 float64 QEMU_FLATTEN
2178 float64_mul(float64 a, float64 b, float_status *s)
2179 {
2180     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2181                         f64_is_zon2, f64_addsubmul_post);
2182 }
2183 
2184 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2185 {
2186     FloatParts64 pa, pb, *pr;
2187 
2188     float64_unpack_canonical(&pa, a, status);
2189     float64_unpack_canonical(&pb, b, status);
2190     pr = parts_mul(&pa, &pb, status);
2191 
2192     return float64r32_round_pack_canonical(pr, status);
2193 }
2194 
2195 bfloat16 QEMU_FLATTEN
2196 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2197 {
2198     FloatParts64 pa, pb, *pr;
2199 
2200     bfloat16_unpack_canonical(&pa, a, status);
2201     bfloat16_unpack_canonical(&pb, b, status);
2202     pr = parts_mul(&pa, &pb, status);
2203 
2204     return bfloat16_round_pack_canonical(pr, status);
2205 }
2206 
2207 float128 QEMU_FLATTEN
2208 float128_mul(float128 a, float128 b, float_status *status)
2209 {
2210     FloatParts128 pa, pb, *pr;
2211 
2212     float128_unpack_canonical(&pa, a, status);
2213     float128_unpack_canonical(&pb, b, status);
2214     pr = parts_mul(&pa, &pb, status);
2215 
2216     return float128_round_pack_canonical(pr, status);
2217 }
2218 
2219 floatx80 QEMU_FLATTEN
2220 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2221 {
2222     FloatParts128 pa, pb, *pr;
2223 
2224     if (!floatx80_unpack_canonical(&pa, a, status) ||
2225         !floatx80_unpack_canonical(&pb, b, status)) {
2226         return floatx80_default_nan(status);
2227     }
2228 
2229     pr = parts_mul(&pa, &pb, status);
2230     return floatx80_round_pack_canonical(pr, status);
2231 }
2232 
2233 /*
2234  * Fused multiply-add
2235  */
2236 
2237 float16 QEMU_FLATTEN
2238 float16_muladd_scalbn(float16 a, float16 b, float16 c,
2239                       int scale, int flags, float_status *status)
2240 {
2241     FloatParts64 pa, pb, pc, *pr;
2242 
2243     float16_unpack_canonical(&pa, a, status);
2244     float16_unpack_canonical(&pb, b, status);
2245     float16_unpack_canonical(&pc, c, status);
2246     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2247 
2248     return float16_round_pack_canonical(pr, status);
2249 }
2250 
2251 float16 float16_muladd(float16 a, float16 b, float16 c,
2252                        int flags, float_status *status)
2253 {
2254     return float16_muladd_scalbn(a, b, c, 0, flags, status);
2255 }
2256 
2257 float32 QEMU_SOFTFLOAT_ATTR
2258 float32_muladd_scalbn(float32 a, float32 b, float32 c,
2259                       int scale, int flags, float_status *status)
2260 {
2261     FloatParts64 pa, pb, pc, *pr;
2262 
2263     float32_unpack_canonical(&pa, a, status);
2264     float32_unpack_canonical(&pb, b, status);
2265     float32_unpack_canonical(&pc, c, status);
2266     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2267 
2268     return float32_round_pack_canonical(pr, status);
2269 }
2270 
2271 float64 QEMU_SOFTFLOAT_ATTR
2272 float64_muladd_scalbn(float64 a, float64 b, float64 c,
2273                       int scale, int flags, float_status *status)
2274 {
2275     FloatParts64 pa, pb, pc, *pr;
2276 
2277     float64_unpack_canonical(&pa, a, status);
2278     float64_unpack_canonical(&pb, b, status);
2279     float64_unpack_canonical(&pc, c, status);
2280     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2281 
2282     return float64_round_pack_canonical(pr, status);
2283 }
2284 
2285 static bool force_soft_fma;
2286 
2287 float32 QEMU_FLATTEN
2288 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2289 {
2290     union_float32 ua, ub, uc, ur;
2291 
2292     ua.s = xa;
2293     ub.s = xb;
2294     uc.s = xc;
2295 
2296     if (unlikely(!can_use_fpu(s))) {
2297         goto soft;
2298     }
2299     if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
2300         goto soft;
2301     }
2302 
2303     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2304     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2305         goto soft;
2306     }
2307 
2308     if (unlikely(force_soft_fma)) {
2309         goto soft;
2310     }
2311 
2312     /*
2313      * When (a || b) == 0, there's no need to check for under/over flow,
2314      * since we know the addend is (normal || 0) and the product is 0.
2315      */
2316     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2317         union_float32 up;
2318         bool prod_sign;
2319 
2320         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2321         prod_sign ^= !!(flags & float_muladd_negate_product);
2322         up.s = float32_set_sign(float32_zero, prod_sign);
2323 
2324         if (flags & float_muladd_negate_c) {
2325             uc.h = -uc.h;
2326         }
2327         ur.h = up.h + uc.h;
2328     } else {
2329         union_float32 ua_orig = ua;
2330         union_float32 uc_orig = uc;
2331 
2332         if (flags & float_muladd_negate_product) {
2333             ua.h = -ua.h;
2334         }
2335         if (flags & float_muladd_negate_c) {
2336             uc.h = -uc.h;
2337         }
2338 
2339         ur.h = fmaf(ua.h, ub.h, uc.h);
2340 
2341         if (unlikely(f32_is_inf(ur))) {
2342             float_raise(float_flag_overflow, s);
2343         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2344             ua = ua_orig;
2345             uc = uc_orig;
2346             goto soft;
2347         }
2348     }
2349     if (flags & float_muladd_negate_result) {
2350         return float32_chs(ur.s);
2351     }
2352     return ur.s;
2353 
2354  soft:
2355     return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2356 }
2357 
2358 float64 QEMU_FLATTEN
2359 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2360 {
2361     union_float64 ua, ub, uc, ur;
2362 
2363     ua.s = xa;
2364     ub.s = xb;
2365     uc.s = xc;
2366 
2367     if (unlikely(!can_use_fpu(s))) {
2368         goto soft;
2369     }
2370 
2371     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2372     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2373         goto soft;
2374     }
2375 
2376     if (unlikely(force_soft_fma)) {
2377         goto soft;
2378     }
2379 
2380     /*
2381      * When (a || b) == 0, there's no need to check for under/over flow,
2382      * since we know the addend is (normal || 0) and the product is 0.
2383      */
2384     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2385         union_float64 up;
2386         bool prod_sign;
2387 
2388         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2389         prod_sign ^= !!(flags & float_muladd_negate_product);
2390         up.s = float64_set_sign(float64_zero, prod_sign);
2391 
2392         if (flags & float_muladd_negate_c) {
2393             uc.h = -uc.h;
2394         }
2395         ur.h = up.h + uc.h;
2396     } else {
2397         union_float64 ua_orig = ua;
2398         union_float64 uc_orig = uc;
2399 
2400         if (flags & float_muladd_negate_product) {
2401             ua.h = -ua.h;
2402         }
2403         if (flags & float_muladd_negate_c) {
2404             uc.h = -uc.h;
2405         }
2406 
2407         ur.h = fma(ua.h, ub.h, uc.h);
2408 
2409         if (unlikely(f64_is_inf(ur))) {
2410             float_raise(float_flag_overflow, s);
2411         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2412             ua = ua_orig;
2413             uc = uc_orig;
2414             goto soft;
2415         }
2416     }
2417     if (flags & float_muladd_negate_result) {
2418         return float64_chs(ur.s);
2419     }
2420     return ur.s;
2421 
2422  soft:
2423     return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2424 }
2425 
2426 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2427                           int flags, float_status *status)
2428 {
2429     FloatParts64 pa, pb, pc, *pr;
2430 
2431     float64_unpack_canonical(&pa, a, status);
2432     float64_unpack_canonical(&pb, b, status);
2433     float64_unpack_canonical(&pc, c, status);
2434     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2435 
2436     return float64r32_round_pack_canonical(pr, status);
2437 }
2438 
2439 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2440                                       int flags, float_status *status)
2441 {
2442     FloatParts64 pa, pb, pc, *pr;
2443 
2444     bfloat16_unpack_canonical(&pa, a, status);
2445     bfloat16_unpack_canonical(&pb, b, status);
2446     bfloat16_unpack_canonical(&pc, c, status);
2447     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2448 
2449     return bfloat16_round_pack_canonical(pr, status);
2450 }
2451 
2452 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2453                                       int flags, float_status *status)
2454 {
2455     FloatParts128 pa, pb, pc, *pr;
2456 
2457     float128_unpack_canonical(&pa, a, status);
2458     float128_unpack_canonical(&pb, b, status);
2459     float128_unpack_canonical(&pc, c, status);
2460     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2461 
2462     return float128_round_pack_canonical(pr, status);
2463 }
2464 
2465 /*
2466  * Division
2467  */
2468 
2469 float16 float16_div(float16 a, float16 b, float_status *status)
2470 {
2471     FloatParts64 pa, pb, *pr;
2472 
2473     float16_unpack_canonical(&pa, a, status);
2474     float16_unpack_canonical(&pb, b, status);
2475     pr = parts_div(&pa, &pb, status);
2476 
2477     return float16_round_pack_canonical(pr, status);
2478 }
2479 
2480 static float32 QEMU_SOFTFLOAT_ATTR
2481 soft_f32_div(float32 a, float32 b, float_status *status)
2482 {
2483     FloatParts64 pa, pb, *pr;
2484 
2485     float32_unpack_canonical(&pa, a, status);
2486     float32_unpack_canonical(&pb, b, status);
2487     pr = parts_div(&pa, &pb, status);
2488 
2489     return float32_round_pack_canonical(pr, status);
2490 }
2491 
2492 static float64 QEMU_SOFTFLOAT_ATTR
2493 soft_f64_div(float64 a, float64 b, float_status *status)
2494 {
2495     FloatParts64 pa, pb, *pr;
2496 
2497     float64_unpack_canonical(&pa, a, status);
2498     float64_unpack_canonical(&pb, b, status);
2499     pr = parts_div(&pa, &pb, status);
2500 
2501     return float64_round_pack_canonical(pr, status);
2502 }
2503 
2504 static float hard_f32_div(float a, float b)
2505 {
2506     return a / b;
2507 }
2508 
2509 static double hard_f64_div(double a, double b)
2510 {
2511     return a / b;
2512 }
2513 
2514 static bool f32_div_pre(union_float32 a, union_float32 b)
2515 {
2516     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2517         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2518                fpclassify(b.h) == FP_NORMAL;
2519     }
2520     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2521 }
2522 
2523 static bool f64_div_pre(union_float64 a, union_float64 b)
2524 {
2525     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2526         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2527                fpclassify(b.h) == FP_NORMAL;
2528     }
2529     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2530 }
2531 
2532 static bool f32_div_post(union_float32 a, union_float32 b)
2533 {
2534     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2535         return fpclassify(a.h) != FP_ZERO;
2536     }
2537     return !float32_is_zero(a.s);
2538 }
2539 
2540 static bool f64_div_post(union_float64 a, union_float64 b)
2541 {
2542     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2543         return fpclassify(a.h) != FP_ZERO;
2544     }
2545     return !float64_is_zero(a.s);
2546 }
2547 
2548 float32 QEMU_FLATTEN
2549 float32_div(float32 a, float32 b, float_status *s)
2550 {
2551     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2552                         f32_div_pre, f32_div_post);
2553 }
2554 
2555 float64 QEMU_FLATTEN
2556 float64_div(float64 a, float64 b, float_status *s)
2557 {
2558     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2559                         f64_div_pre, f64_div_post);
2560 }
2561 
2562 float64 float64r32_div(float64 a, float64 b, float_status *status)
2563 {
2564     FloatParts64 pa, pb, *pr;
2565 
2566     float64_unpack_canonical(&pa, a, status);
2567     float64_unpack_canonical(&pb, b, status);
2568     pr = parts_div(&pa, &pb, status);
2569 
2570     return float64r32_round_pack_canonical(pr, status);
2571 }
2572 
2573 bfloat16 QEMU_FLATTEN
2574 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2575 {
2576     FloatParts64 pa, pb, *pr;
2577 
2578     bfloat16_unpack_canonical(&pa, a, status);
2579     bfloat16_unpack_canonical(&pb, b, status);
2580     pr = parts_div(&pa, &pb, status);
2581 
2582     return bfloat16_round_pack_canonical(pr, status);
2583 }
2584 
2585 float128 QEMU_FLATTEN
2586 float128_div(float128 a, float128 b, float_status *status)
2587 {
2588     FloatParts128 pa, pb, *pr;
2589 
2590     float128_unpack_canonical(&pa, a, status);
2591     float128_unpack_canonical(&pb, b, status);
2592     pr = parts_div(&pa, &pb, status);
2593 
2594     return float128_round_pack_canonical(pr, status);
2595 }
2596 
2597 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2598 {
2599     FloatParts128 pa, pb, *pr;
2600 
2601     if (!floatx80_unpack_canonical(&pa, a, status) ||
2602         !floatx80_unpack_canonical(&pb, b, status)) {
2603         return floatx80_default_nan(status);
2604     }
2605 
2606     pr = parts_div(&pa, &pb, status);
2607     return floatx80_round_pack_canonical(pr, status);
2608 }
2609 
2610 /*
2611  * Remainder
2612  */
2613 
2614 float32 float32_rem(float32 a, float32 b, float_status *status)
2615 {
2616     FloatParts64 pa, pb, *pr;
2617 
2618     float32_unpack_canonical(&pa, a, status);
2619     float32_unpack_canonical(&pb, b, status);
2620     pr = parts_modrem(&pa, &pb, NULL, status);
2621 
2622     return float32_round_pack_canonical(pr, status);
2623 }
2624 
2625 float64 float64_rem(float64 a, float64 b, float_status *status)
2626 {
2627     FloatParts64 pa, pb, *pr;
2628 
2629     float64_unpack_canonical(&pa, a, status);
2630     float64_unpack_canonical(&pb, b, status);
2631     pr = parts_modrem(&pa, &pb, NULL, status);
2632 
2633     return float64_round_pack_canonical(pr, status);
2634 }
2635 
2636 float128 float128_rem(float128 a, float128 b, float_status *status)
2637 {
2638     FloatParts128 pa, pb, *pr;
2639 
2640     float128_unpack_canonical(&pa, a, status);
2641     float128_unpack_canonical(&pb, b, status);
2642     pr = parts_modrem(&pa, &pb, NULL, status);
2643 
2644     return float128_round_pack_canonical(pr, status);
2645 }
2646 
2647 /*
2648  * Returns the remainder of the extended double-precision floating-point value
2649  * `a' with respect to the corresponding value `b'.
2650  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2651  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2652  * the remainder based on truncating the quotient toward zero instead and
2653  * *quotient is set to the low 64 bits of the absolute value of the integer
2654  * quotient.
2655  */
2656 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2657                          uint64_t *quotient, float_status *status)
2658 {
2659     FloatParts128 pa, pb, *pr;
2660 
2661     *quotient = 0;
2662     if (!floatx80_unpack_canonical(&pa, a, status) ||
2663         !floatx80_unpack_canonical(&pb, b, status)) {
2664         return floatx80_default_nan(status);
2665     }
2666     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2667 
2668     return floatx80_round_pack_canonical(pr, status);
2669 }
2670 
2671 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2672 {
2673     uint64_t quotient;
2674     return floatx80_modrem(a, b, false, &quotient, status);
2675 }
2676 
2677 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2678 {
2679     uint64_t quotient;
2680     return floatx80_modrem(a, b, true, &quotient, status);
2681 }
2682 
2683 /*
2684  * Float to Float conversions
2685  *
2686  * Returns the result of converting one float format to another. The
2687  * conversion is performed according to the IEC/IEEE Standard for
2688  * Binary Floating-Point Arithmetic.
2689  *
2690  * Usually this only needs to take care of raising invalid exceptions
2691  * and handling the conversion on NaNs.
2692  */
2693 
2694 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2695 {
2696     switch (a->cls) {
2697     case float_class_snan:
2698         float_raise(float_flag_invalid_snan, s);
2699         /* fall through */
2700     case float_class_qnan:
2701         /*
2702          * There is no NaN in the destination format.  Raise Invalid
2703          * and return a zero with the sign of the input NaN.
2704          */
2705         float_raise(float_flag_invalid, s);
2706         a->cls = float_class_zero;
2707         break;
2708 
2709     case float_class_inf:
2710         /*
2711          * There is no Inf in the destination format.  Raise Invalid
2712          * and return the maximum normal with the correct sign.
2713          */
2714         float_raise(float_flag_invalid, s);
2715         a->cls = float_class_normal;
2716         a->exp = float16_params_ahp.exp_max;
2717         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2718                                   float16_params_ahp.frac_size + 1);
2719         break;
2720 
2721     case float_class_denormal:
2722         float_raise(float_flag_input_denormal_used, s);
2723         break;
2724     case float_class_normal:
2725     case float_class_zero:
2726         break;
2727 
2728     default:
2729         g_assert_not_reached();
2730     }
2731 }
2732 
2733 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2734 {
2735     if (is_nan(a->cls)) {
2736         parts_return_nan(a, s);
2737     }
2738     if (a->cls == float_class_denormal) {
2739         float_raise(float_flag_input_denormal_used, s);
2740     }
2741 }
2742 
2743 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2744 {
2745     if (is_nan(a->cls)) {
2746         parts_return_nan(a, s);
2747     }
2748     if (a->cls == float_class_denormal) {
2749         float_raise(float_flag_input_denormal_used, s);
2750     }
2751 }
2752 
2753 #define parts_float_to_float(P, S) \
2754     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2755 
2756 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2757                                         float_status *s)
2758 {
2759     a->cls = b->cls;
2760     a->sign = b->sign;
2761     a->exp = b->exp;
2762 
2763     switch (a->cls) {
2764     case float_class_denormal:
2765         float_raise(float_flag_input_denormal_used, s);
2766         /* fall through */
2767     case float_class_normal:
2768         frac_truncjam(a, b);
2769         break;
2770     case float_class_snan:
2771     case float_class_qnan:
2772         /* Discard the low bits of the NaN. */
2773         a->frac = b->frac_hi;
2774         parts_return_nan(a, s);
2775         break;
2776     default:
2777         break;
2778     }
2779 }
2780 
2781 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2782                                        float_status *s)
2783 {
2784     a->cls = b->cls;
2785     a->sign = b->sign;
2786     a->exp = b->exp;
2787     frac_widen(a, b);
2788 
2789     if (is_nan(a->cls)) {
2790         parts_return_nan(a, s);
2791     }
2792     if (a->cls == float_class_denormal) {
2793         float_raise(float_flag_input_denormal_used, s);
2794     }
2795 }
2796 
2797 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2798 {
2799     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2800     FloatParts64 p;
2801 
2802     float16a_unpack_canonical(&p, a, s, fmt16);
2803     parts_float_to_float(&p, s);
2804     return float32_round_pack_canonical(&p, s);
2805 }
2806 
2807 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2808 {
2809     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2810     FloatParts64 p;
2811 
2812     float16a_unpack_canonical(&p, a, s, fmt16);
2813     parts_float_to_float(&p, s);
2814     return float64_round_pack_canonical(&p, s);
2815 }
2816 
2817 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2818 {
2819     FloatParts64 p;
2820     const FloatFmt *fmt;
2821 
2822     float32_unpack_canonical(&p, a, s);
2823     if (ieee) {
2824         parts_float_to_float(&p, s);
2825         fmt = &float16_params;
2826     } else {
2827         parts_float_to_ahp(&p, s);
2828         fmt = &float16_params_ahp;
2829     }
2830     return float16a_round_pack_canonical(&p, s, fmt);
2831 }
2832 
2833 static float64 QEMU_SOFTFLOAT_ATTR
2834 soft_float32_to_float64(float32 a, float_status *s)
2835 {
2836     FloatParts64 p;
2837 
2838     float32_unpack_canonical(&p, a, s);
2839     parts_float_to_float(&p, s);
2840     return float64_round_pack_canonical(&p, s);
2841 }
2842 
2843 float64 float32_to_float64(float32 a, float_status *s)
2844 {
2845     if (likely(float32_is_normal(a))) {
2846         /* Widening conversion can never produce inexact results.  */
2847         union_float32 uf;
2848         union_float64 ud;
2849         uf.s = a;
2850         ud.h = uf.h;
2851         return ud.s;
2852     } else if (float32_is_zero(a)) {
2853         return float64_set_sign(float64_zero, float32_is_neg(a));
2854     } else {
2855         return soft_float32_to_float64(a, s);
2856     }
2857 }
2858 
2859 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2860 {
2861     FloatParts64 p;
2862     const FloatFmt *fmt;
2863 
2864     float64_unpack_canonical(&p, a, s);
2865     if (ieee) {
2866         parts_float_to_float(&p, s);
2867         fmt = &float16_params;
2868     } else {
2869         parts_float_to_ahp(&p, s);
2870         fmt = &float16_params_ahp;
2871     }
2872     return float16a_round_pack_canonical(&p, s, fmt);
2873 }
2874 
2875 float32 float64_to_float32(float64 a, float_status *s)
2876 {
2877     FloatParts64 p;
2878 
2879     float64_unpack_canonical(&p, a, s);
2880     parts_float_to_float(&p, s);
2881     return float32_round_pack_canonical(&p, s);
2882 }
2883 
2884 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2885 {
2886     FloatParts64 p;
2887 
2888     bfloat16_unpack_canonical(&p, a, s);
2889     parts_float_to_float(&p, s);
2890     return float32_round_pack_canonical(&p, s);
2891 }
2892 
2893 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2894 {
2895     FloatParts64 p;
2896 
2897     bfloat16_unpack_canonical(&p, a, s);
2898     parts_float_to_float(&p, s);
2899     return float64_round_pack_canonical(&p, s);
2900 }
2901 
2902 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2903 {
2904     FloatParts64 p;
2905 
2906     float32_unpack_canonical(&p, a, s);
2907     parts_float_to_float(&p, s);
2908     return bfloat16_round_pack_canonical(&p, s);
2909 }
2910 
2911 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2912 {
2913     FloatParts64 p;
2914 
2915     float64_unpack_canonical(&p, a, s);
2916     parts_float_to_float(&p, s);
2917     return bfloat16_round_pack_canonical(&p, s);
2918 }
2919 
2920 float32 float128_to_float32(float128 a, float_status *s)
2921 {
2922     FloatParts64 p64;
2923     FloatParts128 p128;
2924 
2925     float128_unpack_canonical(&p128, a, s);
2926     parts_float_to_float_narrow(&p64, &p128, s);
2927     return float32_round_pack_canonical(&p64, s);
2928 }
2929 
2930 float64 float128_to_float64(float128 a, float_status *s)
2931 {
2932     FloatParts64 p64;
2933     FloatParts128 p128;
2934 
2935     float128_unpack_canonical(&p128, a, s);
2936     parts_float_to_float_narrow(&p64, &p128, s);
2937     return float64_round_pack_canonical(&p64, s);
2938 }
2939 
2940 float128 float32_to_float128(float32 a, float_status *s)
2941 {
2942     FloatParts64 p64;
2943     FloatParts128 p128;
2944 
2945     float32_unpack_canonical(&p64, a, s);
2946     parts_float_to_float_widen(&p128, &p64, s);
2947     return float128_round_pack_canonical(&p128, s);
2948 }
2949 
2950 float128 float64_to_float128(float64 a, float_status *s)
2951 {
2952     FloatParts64 p64;
2953     FloatParts128 p128;
2954 
2955     float64_unpack_canonical(&p64, a, s);
2956     parts_float_to_float_widen(&p128, &p64, s);
2957     return float128_round_pack_canonical(&p128, s);
2958 }
2959 
2960 float32 floatx80_to_float32(floatx80 a, float_status *s)
2961 {
2962     FloatParts64 p64;
2963     FloatParts128 p128;
2964 
2965     if (floatx80_unpack_canonical(&p128, a, s)) {
2966         parts_float_to_float_narrow(&p64, &p128, s);
2967     } else {
2968         parts_default_nan(&p64, s);
2969     }
2970     return float32_round_pack_canonical(&p64, s);
2971 }
2972 
2973 float64 floatx80_to_float64(floatx80 a, float_status *s)
2974 {
2975     FloatParts64 p64;
2976     FloatParts128 p128;
2977 
2978     if (floatx80_unpack_canonical(&p128, a, s)) {
2979         parts_float_to_float_narrow(&p64, &p128, s);
2980     } else {
2981         parts_default_nan(&p64, s);
2982     }
2983     return float64_round_pack_canonical(&p64, s);
2984 }
2985 
2986 float128 floatx80_to_float128(floatx80 a, float_status *s)
2987 {
2988     FloatParts128 p;
2989 
2990     if (floatx80_unpack_canonical(&p, a, s)) {
2991         parts_float_to_float(&p, s);
2992     } else {
2993         parts_default_nan(&p, s);
2994     }
2995     return float128_round_pack_canonical(&p, s);
2996 }
2997 
2998 floatx80 float32_to_floatx80(float32 a, float_status *s)
2999 {
3000     FloatParts64 p64;
3001     FloatParts128 p128;
3002 
3003     float32_unpack_canonical(&p64, a, s);
3004     parts_float_to_float_widen(&p128, &p64, s);
3005     return floatx80_round_pack_canonical(&p128, s);
3006 }
3007 
3008 floatx80 float64_to_floatx80(float64 a, float_status *s)
3009 {
3010     FloatParts64 p64;
3011     FloatParts128 p128;
3012 
3013     float64_unpack_canonical(&p64, a, s);
3014     parts_float_to_float_widen(&p128, &p64, s);
3015     return floatx80_round_pack_canonical(&p128, s);
3016 }
3017 
3018 floatx80 float128_to_floatx80(float128 a, float_status *s)
3019 {
3020     FloatParts128 p;
3021 
3022     float128_unpack_canonical(&p, a, s);
3023     parts_float_to_float(&p, s);
3024     return floatx80_round_pack_canonical(&p, s);
3025 }
3026 
3027 /*
3028  * Round to integral value
3029  */
3030 
3031 float16 float16_round_to_int(float16 a, float_status *s)
3032 {
3033     FloatParts64 p;
3034 
3035     float16_unpack_canonical(&p, a, s);
3036     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
3037     return float16_round_pack_canonical(&p, s);
3038 }
3039 
3040 float32 float32_round_to_int(float32 a, float_status *s)
3041 {
3042     FloatParts64 p;
3043 
3044     float32_unpack_canonical(&p, a, s);
3045     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
3046     return float32_round_pack_canonical(&p, s);
3047 }
3048 
3049 float64 float64_round_to_int(float64 a, float_status *s)
3050 {
3051     FloatParts64 p;
3052 
3053     float64_unpack_canonical(&p, a, s);
3054     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3055     return float64_round_pack_canonical(&p, s);
3056 }
3057 
3058 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3059 {
3060     FloatParts64 p;
3061 
3062     bfloat16_unpack_canonical(&p, a, s);
3063     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3064     return bfloat16_round_pack_canonical(&p, s);
3065 }
3066 
3067 float128 float128_round_to_int(float128 a, float_status *s)
3068 {
3069     FloatParts128 p;
3070 
3071     float128_unpack_canonical(&p, a, s);
3072     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3073     return float128_round_pack_canonical(&p, s);
3074 }
3075 
3076 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3077 {
3078     FloatParts128 p;
3079 
3080     if (!floatx80_unpack_canonical(&p, a, status)) {
3081         return floatx80_default_nan(status);
3082     }
3083 
3084     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3085                        &floatx80_params[status->floatx80_rounding_precision]);
3086     return floatx80_round_pack_canonical(&p, status);
3087 }
3088 
3089 /*
3090  * Floating-point to signed integer conversions
3091  */
3092 
3093 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3094                               float_status *s)
3095 {
3096     FloatParts64 p;
3097 
3098     float16_unpack_canonical(&p, a, s);
3099     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3100 }
3101 
3102 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3103                                 float_status *s)
3104 {
3105     FloatParts64 p;
3106 
3107     float16_unpack_canonical(&p, a, s);
3108     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3109 }
3110 
3111 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3112                                 float_status *s)
3113 {
3114     FloatParts64 p;
3115 
3116     float16_unpack_canonical(&p, a, s);
3117     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3118 }
3119 
3120 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3121                                 float_status *s)
3122 {
3123     FloatParts64 p;
3124 
3125     float16_unpack_canonical(&p, a, s);
3126     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3127 }
3128 
3129 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3130                                 float_status *s)
3131 {
3132     FloatParts64 p;
3133 
3134     float32_unpack_canonical(&p, a, s);
3135     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3136 }
3137 
3138 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3139                                 float_status *s)
3140 {
3141     FloatParts64 p;
3142 
3143     float32_unpack_canonical(&p, a, s);
3144     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3145 }
3146 
3147 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3148                                 float_status *s)
3149 {
3150     FloatParts64 p;
3151 
3152     float32_unpack_canonical(&p, a, s);
3153     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3154 }
3155 
3156 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3157                                 float_status *s)
3158 {
3159     FloatParts64 p;
3160 
3161     float64_unpack_canonical(&p, a, s);
3162     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3163 }
3164 
3165 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3166                                 float_status *s)
3167 {
3168     FloatParts64 p;
3169 
3170     float64_unpack_canonical(&p, a, s);
3171     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3172 }
3173 
3174 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3175                                 float_status *s)
3176 {
3177     FloatParts64 p;
3178 
3179     float64_unpack_canonical(&p, a, s);
3180     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3181 }
3182 
3183 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3184                                float_status *s)
3185 {
3186     FloatParts64 p;
3187 
3188     bfloat16_unpack_canonical(&p, a, s);
3189     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3190 }
3191 
3192 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3193                                  float_status *s)
3194 {
3195     FloatParts64 p;
3196 
3197     bfloat16_unpack_canonical(&p, a, s);
3198     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3199 }
3200 
3201 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3202                                  float_status *s)
3203 {
3204     FloatParts64 p;
3205 
3206     bfloat16_unpack_canonical(&p, a, s);
3207     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3208 }
3209 
3210 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3211                                  float_status *s)
3212 {
3213     FloatParts64 p;
3214 
3215     bfloat16_unpack_canonical(&p, a, s);
3216     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3217 }
3218 
3219 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3220                                         int scale, float_status *s)
3221 {
3222     FloatParts128 p;
3223 
3224     float128_unpack_canonical(&p, a, s);
3225     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3226 }
3227 
3228 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3229                                         int scale, float_status *s)
3230 {
3231     FloatParts128 p;
3232 
3233     float128_unpack_canonical(&p, a, s);
3234     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3235 }
3236 
3237 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3238                                         int scale, float_status *s)
3239 {
3240     int flags = 0;
3241     Int128 r;
3242     FloatParts128 p;
3243 
3244     float128_unpack_canonical(&p, a, s);
3245 
3246     switch (p.cls) {
3247     case float_class_snan:
3248         flags |= float_flag_invalid_snan;
3249         /* fall through */
3250     case float_class_qnan:
3251         flags |= float_flag_invalid;
3252         r = UINT128_MAX;
3253         break;
3254 
3255     case float_class_inf:
3256         flags = float_flag_invalid | float_flag_invalid_cvti;
3257         r = p.sign ? INT128_MIN : INT128_MAX;
3258         break;
3259 
3260     case float_class_zero:
3261         return int128_zero();
3262 
3263     case float_class_normal:
3264     case float_class_denormal:
3265         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3266             flags = float_flag_inexact;
3267         }
3268 
3269         if (p.exp < 127) {
3270             int shift = 127 - p.exp;
3271             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3272             if (p.sign) {
3273                 r = int128_neg(r);
3274             }
3275         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3276                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3277             r = INT128_MIN;
3278         } else {
3279             flags = float_flag_invalid | float_flag_invalid_cvti;
3280             r = p.sign ? INT128_MIN : INT128_MAX;
3281         }
3282         break;
3283 
3284     default:
3285         g_assert_not_reached();
3286     }
3287 
3288     float_raise(flags, s);
3289     return r;
3290 }
3291 
3292 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3293                                         int scale, float_status *s)
3294 {
3295     FloatParts128 p;
3296 
3297     if (!floatx80_unpack_canonical(&p, a, s)) {
3298         parts_default_nan(&p, s);
3299     }
3300     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3301 }
3302 
3303 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3304                                         int scale, float_status *s)
3305 {
3306     FloatParts128 p;
3307 
3308     if (!floatx80_unpack_canonical(&p, a, s)) {
3309         parts_default_nan(&p, s);
3310     }
3311     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3312 }
3313 
3314 int8_t float16_to_int8(float16 a, float_status *s)
3315 {
3316     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3317 }
3318 
3319 int16_t float16_to_int16(float16 a, float_status *s)
3320 {
3321     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3322 }
3323 
3324 int32_t float16_to_int32(float16 a, float_status *s)
3325 {
3326     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3327 }
3328 
3329 int64_t float16_to_int64(float16 a, float_status *s)
3330 {
3331     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3332 }
3333 
3334 int16_t float32_to_int16(float32 a, float_status *s)
3335 {
3336     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3337 }
3338 
3339 int32_t float32_to_int32(float32 a, float_status *s)
3340 {
3341     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3342 }
3343 
3344 int64_t float32_to_int64(float32 a, float_status *s)
3345 {
3346     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3347 }
3348 
3349 int16_t float64_to_int16(float64 a, float_status *s)
3350 {
3351     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3352 }
3353 
3354 int32_t float64_to_int32(float64 a, float_status *s)
3355 {
3356     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3357 }
3358 
3359 int64_t float64_to_int64(float64 a, float_status *s)
3360 {
3361     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3362 }
3363 
3364 int32_t float128_to_int32(float128 a, float_status *s)
3365 {
3366     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3367 }
3368 
3369 int64_t float128_to_int64(float128 a, float_status *s)
3370 {
3371     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3372 }
3373 
3374 Int128 float128_to_int128(float128 a, float_status *s)
3375 {
3376     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3377 }
3378 
3379 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3380 {
3381     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3382 }
3383 
3384 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3385 {
3386     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3387 }
3388 
3389 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3390 {
3391     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3392 }
3393 
3394 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3395 {
3396     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3397 }
3398 
3399 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3400 {
3401     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3402 }
3403 
3404 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3405 {
3406     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3407 }
3408 
3409 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3410 {
3411     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3412 }
3413 
3414 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3415 {
3416     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3417 }
3418 
3419 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3420 {
3421     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3422 }
3423 
3424 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3425 {
3426     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3427 }
3428 
3429 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3430 {
3431     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3432 }
3433 
3434 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3435 {
3436     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3437 }
3438 
3439 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3440 {
3441     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3442 }
3443 
3444 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3445 {
3446     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3447 }
3448 
3449 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3450 {
3451     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3452 }
3453 
3454 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3455 {
3456     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3457 }
3458 
3459 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3460 {
3461     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3462 }
3463 
3464 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3465 {
3466     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3467 }
3468 
3469 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3470 {
3471     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3472 }
3473 
3474 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3475 {
3476     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3477 }
3478 
3479 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3480 {
3481     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3482 }
3483 
3484 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3485 {
3486     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3487 }
3488 
3489 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3490 {
3491     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3492 }
3493 
3494 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3495 {
3496     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3497 }
3498 
3499 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3500                                 float_status *s)
3501 {
3502     FloatParts64 p;
3503 
3504     float64_unpack_canonical(&p, a, s);
3505     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3506 }
3507 
3508 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3509                                 float_status *s)
3510 {
3511     FloatParts64 p;
3512 
3513     float64_unpack_canonical(&p, a, s);
3514     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3515 }
3516 
3517 /*
3518  * Floating-point to unsigned integer conversions
3519  */
3520 
3521 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3522                                 float_status *s)
3523 {
3524     FloatParts64 p;
3525 
3526     float16_unpack_canonical(&p, a, s);
3527     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3528 }
3529 
3530 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3531                                   float_status *s)
3532 {
3533     FloatParts64 p;
3534 
3535     float16_unpack_canonical(&p, a, s);
3536     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3537 }
3538 
3539 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3540                                   float_status *s)
3541 {
3542     FloatParts64 p;
3543 
3544     float16_unpack_canonical(&p, a, s);
3545     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3546 }
3547 
3548 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3549                                   float_status *s)
3550 {
3551     FloatParts64 p;
3552 
3553     float16_unpack_canonical(&p, a, s);
3554     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3555 }
3556 
3557 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3558                                   float_status *s)
3559 {
3560     FloatParts64 p;
3561 
3562     float32_unpack_canonical(&p, a, s);
3563     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3564 }
3565 
3566 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3567                                   float_status *s)
3568 {
3569     FloatParts64 p;
3570 
3571     float32_unpack_canonical(&p, a, s);
3572     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3573 }
3574 
3575 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3576                                   float_status *s)
3577 {
3578     FloatParts64 p;
3579 
3580     float32_unpack_canonical(&p, a, s);
3581     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3582 }
3583 
3584 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3585                                   float_status *s)
3586 {
3587     FloatParts64 p;
3588 
3589     float64_unpack_canonical(&p, a, s);
3590     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3591 }
3592 
3593 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3594                                   float_status *s)
3595 {
3596     FloatParts64 p;
3597 
3598     float64_unpack_canonical(&p, a, s);
3599     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3600 }
3601 
3602 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3603                                   float_status *s)
3604 {
3605     FloatParts64 p;
3606 
3607     float64_unpack_canonical(&p, a, s);
3608     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3609 }
3610 
3611 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3612                                  int scale, float_status *s)
3613 {
3614     FloatParts64 p;
3615 
3616     bfloat16_unpack_canonical(&p, a, s);
3617     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3618 }
3619 
3620 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3621                                    int scale, float_status *s)
3622 {
3623     FloatParts64 p;
3624 
3625     bfloat16_unpack_canonical(&p, a, s);
3626     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3627 }
3628 
3629 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3630                                    int scale, float_status *s)
3631 {
3632     FloatParts64 p;
3633 
3634     bfloat16_unpack_canonical(&p, a, s);
3635     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3636 }
3637 
3638 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3639                                    int scale, float_status *s)
3640 {
3641     FloatParts64 p;
3642 
3643     bfloat16_unpack_canonical(&p, a, s);
3644     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3645 }
3646 
3647 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3648                                           int scale, float_status *s)
3649 {
3650     FloatParts128 p;
3651 
3652     float128_unpack_canonical(&p, a, s);
3653     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3654 }
3655 
3656 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3657                                           int scale, float_status *s)
3658 {
3659     FloatParts128 p;
3660 
3661     float128_unpack_canonical(&p, a, s);
3662     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3663 }
3664 
3665 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3666                                          int scale, float_status *s)
3667 {
3668     int flags = 0;
3669     Int128 r;
3670     FloatParts128 p;
3671 
3672     float128_unpack_canonical(&p, a, s);
3673 
3674     switch (p.cls) {
3675     case float_class_snan:
3676         flags |= float_flag_invalid_snan;
3677         /* fall through */
3678     case float_class_qnan:
3679         flags |= float_flag_invalid;
3680         r = UINT128_MAX;
3681         break;
3682 
3683     case float_class_inf:
3684         flags = float_flag_invalid | float_flag_invalid_cvti;
3685         r = p.sign ? int128_zero() : UINT128_MAX;
3686         break;
3687 
3688     case float_class_zero:
3689         return int128_zero();
3690 
3691     case float_class_normal:
3692     case float_class_denormal:
3693         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3694             flags = float_flag_inexact;
3695             if (p.cls == float_class_zero) {
3696                 r = int128_zero();
3697                 break;
3698             }
3699         }
3700 
3701         if (p.sign) {
3702             flags = float_flag_invalid | float_flag_invalid_cvti;
3703             r = int128_zero();
3704         } else if (p.exp <= 127) {
3705             int shift = 127 - p.exp;
3706             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3707         } else {
3708             flags = float_flag_invalid | float_flag_invalid_cvti;
3709             r = UINT128_MAX;
3710         }
3711         break;
3712 
3713     default:
3714         g_assert_not_reached();
3715     }
3716 
3717     float_raise(flags, s);
3718     return r;
3719 }
3720 
3721 uint8_t float16_to_uint8(float16 a, float_status *s)
3722 {
3723     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3724 }
3725 
3726 uint16_t float16_to_uint16(float16 a, float_status *s)
3727 {
3728     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3729 }
3730 
3731 uint32_t float16_to_uint32(float16 a, float_status *s)
3732 {
3733     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3734 }
3735 
3736 uint64_t float16_to_uint64(float16 a, float_status *s)
3737 {
3738     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3739 }
3740 
3741 uint16_t float32_to_uint16(float32 a, float_status *s)
3742 {
3743     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3744 }
3745 
3746 uint32_t float32_to_uint32(float32 a, float_status *s)
3747 {
3748     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3749 }
3750 
3751 uint64_t float32_to_uint64(float32 a, float_status *s)
3752 {
3753     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3754 }
3755 
3756 uint16_t float64_to_uint16(float64 a, float_status *s)
3757 {
3758     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3759 }
3760 
3761 uint32_t float64_to_uint32(float64 a, float_status *s)
3762 {
3763     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3764 }
3765 
3766 uint64_t float64_to_uint64(float64 a, float_status *s)
3767 {
3768     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3769 }
3770 
3771 uint32_t float128_to_uint32(float128 a, float_status *s)
3772 {
3773     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3774 }
3775 
3776 uint64_t float128_to_uint64(float128 a, float_status *s)
3777 {
3778     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3779 }
3780 
3781 Int128 float128_to_uint128(float128 a, float_status *s)
3782 {
3783     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3784 }
3785 
3786 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3787 {
3788     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3789 }
3790 
3791 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3792 {
3793     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3794 }
3795 
3796 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3797 {
3798     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3799 }
3800 
3801 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3802 {
3803     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3804 }
3805 
3806 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3807 {
3808     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3809 }
3810 
3811 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3812 {
3813     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3814 }
3815 
3816 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3817 {
3818     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3819 }
3820 
3821 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3822 {
3823     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3824 }
3825 
3826 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3827 {
3828     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3829 }
3830 
3831 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3832 {
3833     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3834 }
3835 
3836 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3837 {
3838     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3839 }
3840 
3841 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3842 {
3843     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3844 }
3845 
3846 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3847 {
3848     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3849 }
3850 
3851 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3852 {
3853     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3854 }
3855 
3856 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3857 {
3858     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3859 }
3860 
3861 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3862 {
3863     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3864 }
3865 
3866 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3867 {
3868     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3869 }
3870 
3871 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3872 {
3873     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3874 }
3875 
3876 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3877 {
3878     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3879 }
3880 
3881 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3882 {
3883     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3884 }
3885 
3886 /*
3887  * Signed integer to floating-point conversions
3888  */
3889 
3890 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3891 {
3892     FloatParts64 p;
3893 
3894     parts_sint_to_float(&p, a, scale, status);
3895     return float16_round_pack_canonical(&p, status);
3896 }
3897 
3898 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3899 {
3900     return int64_to_float16_scalbn(a, scale, status);
3901 }
3902 
3903 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3904 {
3905     return int64_to_float16_scalbn(a, scale, status);
3906 }
3907 
3908 float16 int64_to_float16(int64_t a, float_status *status)
3909 {
3910     return int64_to_float16_scalbn(a, 0, status);
3911 }
3912 
3913 float16 int32_to_float16(int32_t a, float_status *status)
3914 {
3915     return int64_to_float16_scalbn(a, 0, status);
3916 }
3917 
3918 float16 int16_to_float16(int16_t a, float_status *status)
3919 {
3920     return int64_to_float16_scalbn(a, 0, status);
3921 }
3922 
3923 float16 int8_to_float16(int8_t a, float_status *status)
3924 {
3925     return int64_to_float16_scalbn(a, 0, status);
3926 }
3927 
3928 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3929 {
3930     FloatParts64 p;
3931 
3932     /* Without scaling, there are no overflow concerns. */
3933     if (likely(scale == 0) && can_use_fpu(status)) {
3934         union_float32 ur;
3935         ur.h = a;
3936         return ur.s;
3937     }
3938 
3939     parts64_sint_to_float(&p, a, scale, status);
3940     return float32_round_pack_canonical(&p, status);
3941 }
3942 
3943 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3944 {
3945     return int64_to_float32_scalbn(a, scale, status);
3946 }
3947 
3948 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3949 {
3950     return int64_to_float32_scalbn(a, scale, status);
3951 }
3952 
3953 float32 int64_to_float32(int64_t a, float_status *status)
3954 {
3955     return int64_to_float32_scalbn(a, 0, status);
3956 }
3957 
3958 float32 int32_to_float32(int32_t a, float_status *status)
3959 {
3960     return int64_to_float32_scalbn(a, 0, status);
3961 }
3962 
3963 float32 int16_to_float32(int16_t a, float_status *status)
3964 {
3965     return int64_to_float32_scalbn(a, 0, status);
3966 }
3967 
3968 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3969 {
3970     FloatParts64 p;
3971 
3972     /* Without scaling, there are no overflow concerns. */
3973     if (likely(scale == 0) && can_use_fpu(status)) {
3974         union_float64 ur;
3975         ur.h = a;
3976         return ur.s;
3977     }
3978 
3979     parts_sint_to_float(&p, a, scale, status);
3980     return float64_round_pack_canonical(&p, status);
3981 }
3982 
3983 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3984 {
3985     return int64_to_float64_scalbn(a, scale, status);
3986 }
3987 
3988 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3989 {
3990     return int64_to_float64_scalbn(a, scale, status);
3991 }
3992 
3993 float64 int64_to_float64(int64_t a, float_status *status)
3994 {
3995     return int64_to_float64_scalbn(a, 0, status);
3996 }
3997 
3998 float64 int32_to_float64(int32_t a, float_status *status)
3999 {
4000     return int64_to_float64_scalbn(a, 0, status);
4001 }
4002 
4003 float64 int16_to_float64(int16_t a, float_status *status)
4004 {
4005     return int64_to_float64_scalbn(a, 0, status);
4006 }
4007 
4008 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
4009 {
4010     FloatParts64 p;
4011 
4012     parts_sint_to_float(&p, a, scale, status);
4013     return bfloat16_round_pack_canonical(&p, status);
4014 }
4015 
4016 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
4017 {
4018     return int64_to_bfloat16_scalbn(a, scale, status);
4019 }
4020 
4021 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
4022 {
4023     return int64_to_bfloat16_scalbn(a, scale, status);
4024 }
4025 
4026 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
4027 {
4028     return int64_to_bfloat16_scalbn(a, scale, status);
4029 }
4030 
4031 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
4032 {
4033     return int64_to_bfloat16_scalbn(a, 0, status);
4034 }
4035 
4036 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
4037 {
4038     return int64_to_bfloat16_scalbn(a, 0, status);
4039 }
4040 
4041 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
4042 {
4043     return int64_to_bfloat16_scalbn(a, 0, status);
4044 }
4045 
4046 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
4047 {
4048     return int64_to_bfloat16_scalbn(a, 0, status);
4049 }
4050 
4051 float128 int128_to_float128(Int128 a, float_status *status)
4052 {
4053     FloatParts128 p = { };
4054     int shift;
4055 
4056     if (int128_nz(a)) {
4057         p.cls = float_class_normal;
4058         if (!int128_nonneg(a)) {
4059             p.sign = true;
4060             a = int128_neg(a);
4061         }
4062 
4063         shift = clz64(int128_gethi(a));
4064         if (shift == 64) {
4065             shift += clz64(int128_getlo(a));
4066         }
4067 
4068         p.exp = 127 - shift;
4069         a = int128_lshift(a, shift);
4070 
4071         p.frac_hi = int128_gethi(a);
4072         p.frac_lo = int128_getlo(a);
4073     } else {
4074         p.cls = float_class_zero;
4075     }
4076 
4077     return float128_round_pack_canonical(&p, status);
4078 }
4079 
4080 float128 int64_to_float128(int64_t a, float_status *status)
4081 {
4082     FloatParts128 p;
4083 
4084     parts_sint_to_float(&p, a, 0, status);
4085     return float128_round_pack_canonical(&p, status);
4086 }
4087 
4088 float128 int32_to_float128(int32_t a, float_status *status)
4089 {
4090     return int64_to_float128(a, status);
4091 }
4092 
4093 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4094 {
4095     FloatParts128 p;
4096 
4097     parts_sint_to_float(&p, a, 0, status);
4098     return floatx80_round_pack_canonical(&p, status);
4099 }
4100 
4101 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4102 {
4103     return int64_to_floatx80(a, status);
4104 }
4105 
4106 /*
4107  * Unsigned Integer to floating-point conversions
4108  */
4109 
4110 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4111 {
4112     FloatParts64 p;
4113 
4114     parts_uint_to_float(&p, a, scale, status);
4115     return float16_round_pack_canonical(&p, status);
4116 }
4117 
4118 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4119 {
4120     return uint64_to_float16_scalbn(a, scale, status);
4121 }
4122 
4123 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4124 {
4125     return uint64_to_float16_scalbn(a, scale, status);
4126 }
4127 
4128 float16 uint64_to_float16(uint64_t a, float_status *status)
4129 {
4130     return uint64_to_float16_scalbn(a, 0, status);
4131 }
4132 
4133 float16 uint32_to_float16(uint32_t a, float_status *status)
4134 {
4135     return uint64_to_float16_scalbn(a, 0, status);
4136 }
4137 
4138 float16 uint16_to_float16(uint16_t a, float_status *status)
4139 {
4140     return uint64_to_float16_scalbn(a, 0, status);
4141 }
4142 
4143 float16 uint8_to_float16(uint8_t a, float_status *status)
4144 {
4145     return uint64_to_float16_scalbn(a, 0, status);
4146 }
4147 
4148 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4149 {
4150     FloatParts64 p;
4151 
4152     /* Without scaling, there are no overflow concerns. */
4153     if (likely(scale == 0) && can_use_fpu(status)) {
4154         union_float32 ur;
4155         ur.h = a;
4156         return ur.s;
4157     }
4158 
4159     parts_uint_to_float(&p, a, scale, status);
4160     return float32_round_pack_canonical(&p, status);
4161 }
4162 
4163 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4164 {
4165     return uint64_to_float32_scalbn(a, scale, status);
4166 }
4167 
4168 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4169 {
4170     return uint64_to_float32_scalbn(a, scale, status);
4171 }
4172 
4173 float32 uint64_to_float32(uint64_t a, float_status *status)
4174 {
4175     return uint64_to_float32_scalbn(a, 0, status);
4176 }
4177 
4178 float32 uint32_to_float32(uint32_t a, float_status *status)
4179 {
4180     return uint64_to_float32_scalbn(a, 0, status);
4181 }
4182 
4183 float32 uint16_to_float32(uint16_t a, float_status *status)
4184 {
4185     return uint64_to_float32_scalbn(a, 0, status);
4186 }
4187 
4188 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4189 {
4190     FloatParts64 p;
4191 
4192     /* Without scaling, there are no overflow concerns. */
4193     if (likely(scale == 0) && can_use_fpu(status)) {
4194         union_float64 ur;
4195         ur.h = a;
4196         return ur.s;
4197     }
4198 
4199     parts_uint_to_float(&p, a, scale, status);
4200     return float64_round_pack_canonical(&p, status);
4201 }
4202 
4203 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4204 {
4205     return uint64_to_float64_scalbn(a, scale, status);
4206 }
4207 
4208 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4209 {
4210     return uint64_to_float64_scalbn(a, scale, status);
4211 }
4212 
4213 float64 uint64_to_float64(uint64_t a, float_status *status)
4214 {
4215     return uint64_to_float64_scalbn(a, 0, status);
4216 }
4217 
4218 float64 uint32_to_float64(uint32_t a, float_status *status)
4219 {
4220     return uint64_to_float64_scalbn(a, 0, status);
4221 }
4222 
4223 float64 uint16_to_float64(uint16_t a, float_status *status)
4224 {
4225     return uint64_to_float64_scalbn(a, 0, status);
4226 }
4227 
4228 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4229 {
4230     FloatParts64 p;
4231 
4232     parts_uint_to_float(&p, a, scale, status);
4233     return bfloat16_round_pack_canonical(&p, status);
4234 }
4235 
4236 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4237 {
4238     return uint64_to_bfloat16_scalbn(a, scale, status);
4239 }
4240 
4241 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4242 {
4243     return uint64_to_bfloat16_scalbn(a, scale, status);
4244 }
4245 
4246 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4247 {
4248     return uint64_to_bfloat16_scalbn(a, scale, status);
4249 }
4250 
4251 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4252 {
4253     return uint64_to_bfloat16_scalbn(a, 0, status);
4254 }
4255 
4256 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4257 {
4258     return uint64_to_bfloat16_scalbn(a, 0, status);
4259 }
4260 
4261 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4262 {
4263     return uint64_to_bfloat16_scalbn(a, 0, status);
4264 }
4265 
4266 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4267 {
4268     return uint64_to_bfloat16_scalbn(a, 0, status);
4269 }
4270 
4271 float128 uint64_to_float128(uint64_t a, float_status *status)
4272 {
4273     FloatParts128 p;
4274 
4275     parts_uint_to_float(&p, a, 0, status);
4276     return float128_round_pack_canonical(&p, status);
4277 }
4278 
4279 float128 uint128_to_float128(Int128 a, float_status *status)
4280 {
4281     FloatParts128 p = { };
4282     int shift;
4283 
4284     if (int128_nz(a)) {
4285         p.cls = float_class_normal;
4286 
4287         shift = clz64(int128_gethi(a));
4288         if (shift == 64) {
4289             shift += clz64(int128_getlo(a));
4290         }
4291 
4292         p.exp = 127 - shift;
4293         a = int128_lshift(a, shift);
4294 
4295         p.frac_hi = int128_gethi(a);
4296         p.frac_lo = int128_getlo(a);
4297     } else {
4298         p.cls = float_class_zero;
4299     }
4300 
4301     return float128_round_pack_canonical(&p, status);
4302 }
4303 
4304 /*
4305  * Minimum and maximum
4306  */
4307 
4308 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4309 {
4310     FloatParts64 pa, pb, *pr;
4311 
4312     float16_unpack_canonical(&pa, a, s);
4313     float16_unpack_canonical(&pb, b, s);
4314     pr = parts_minmax(&pa, &pb, s, flags);
4315 
4316     return float16_round_pack_canonical(pr, s);
4317 }
4318 
4319 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4320                                 float_status *s, int flags)
4321 {
4322     FloatParts64 pa, pb, *pr;
4323 
4324     bfloat16_unpack_canonical(&pa, a, s);
4325     bfloat16_unpack_canonical(&pb, b, s);
4326     pr = parts_minmax(&pa, &pb, s, flags);
4327 
4328     return bfloat16_round_pack_canonical(pr, s);
4329 }
4330 
4331 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4332 {
4333     FloatParts64 pa, pb, *pr;
4334 
4335     float32_unpack_canonical(&pa, a, s);
4336     float32_unpack_canonical(&pb, b, s);
4337     pr = parts_minmax(&pa, &pb, s, flags);
4338 
4339     return float32_round_pack_canonical(pr, s);
4340 }
4341 
4342 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4343 {
4344     FloatParts64 pa, pb, *pr;
4345 
4346     float64_unpack_canonical(&pa, a, s);
4347     float64_unpack_canonical(&pb, b, s);
4348     pr = parts_minmax(&pa, &pb, s, flags);
4349 
4350     return float64_round_pack_canonical(pr, s);
4351 }
4352 
4353 static float128 float128_minmax(float128 a, float128 b,
4354                                 float_status *s, int flags)
4355 {
4356     FloatParts128 pa, pb, *pr;
4357 
4358     float128_unpack_canonical(&pa, a, s);
4359     float128_unpack_canonical(&pb, b, s);
4360     pr = parts_minmax(&pa, &pb, s, flags);
4361 
4362     return float128_round_pack_canonical(pr, s);
4363 }
4364 
4365 #define MINMAX_1(type, name, flags) \
4366     type type##_##name(type a, type b, float_status *s) \
4367     { return type##_minmax(a, b, s, flags); }
4368 
4369 #define MINMAX_2(type) \
4370     MINMAX_1(type, max, 0)                                                \
4371     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4372     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4373     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4374     MINMAX_1(type, min, minmax_ismin)                                     \
4375     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4376     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4377     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4378 
4379 MINMAX_2(float16)
4380 MINMAX_2(bfloat16)
4381 MINMAX_2(float32)
4382 MINMAX_2(float64)
4383 MINMAX_2(float128)
4384 
4385 #undef MINMAX_1
4386 #undef MINMAX_2
4387 
4388 /*
4389  * Floating point compare
4390  */
4391 
4392 static FloatRelation QEMU_FLATTEN
4393 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4394 {
4395     FloatParts64 pa, pb;
4396 
4397     float16_unpack_canonical(&pa, a, s);
4398     float16_unpack_canonical(&pb, b, s);
4399     return parts_compare(&pa, &pb, s, is_quiet);
4400 }
4401 
4402 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4403 {
4404     return float16_do_compare(a, b, s, false);
4405 }
4406 
4407 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4408 {
4409     return float16_do_compare(a, b, s, true);
4410 }
4411 
4412 static FloatRelation QEMU_SOFTFLOAT_ATTR
4413 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4414 {
4415     FloatParts64 pa, pb;
4416 
4417     float32_unpack_canonical(&pa, a, s);
4418     float32_unpack_canonical(&pb, b, s);
4419     return parts_compare(&pa, &pb, s, is_quiet);
4420 }
4421 
4422 static FloatRelation QEMU_FLATTEN
4423 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4424 {
4425     union_float32 ua, ub;
4426 
4427     ua.s = xa;
4428     ub.s = xb;
4429 
4430     if (QEMU_NO_HARDFLOAT) {
4431         goto soft;
4432     }
4433 
4434     if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) {
4435         /* We may need to set the input_denormal_used flag */
4436         goto soft;
4437     }
4438 
4439     if (isgreaterequal(ua.h, ub.h)) {
4440         if (isgreater(ua.h, ub.h)) {
4441             return float_relation_greater;
4442         }
4443         return float_relation_equal;
4444     }
4445     if (likely(isless(ua.h, ub.h))) {
4446         return float_relation_less;
4447     }
4448     /*
4449      * The only condition remaining is unordered.
4450      * Fall through to set flags.
4451      */
4452  soft:
4453     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4454 }
4455 
4456 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4457 {
4458     return float32_hs_compare(a, b, s, false);
4459 }
4460 
4461 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4462 {
4463     return float32_hs_compare(a, b, s, true);
4464 }
4465 
4466 static FloatRelation QEMU_SOFTFLOAT_ATTR
4467 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4468 {
4469     FloatParts64 pa, pb;
4470 
4471     float64_unpack_canonical(&pa, a, s);
4472     float64_unpack_canonical(&pb, b, s);
4473     return parts_compare(&pa, &pb, s, is_quiet);
4474 }
4475 
4476 static FloatRelation QEMU_FLATTEN
4477 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4478 {
4479     union_float64 ua, ub;
4480 
4481     ua.s = xa;
4482     ub.s = xb;
4483 
4484     if (QEMU_NO_HARDFLOAT) {
4485         goto soft;
4486     }
4487 
4488     if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) {
4489         /* We may need to set the input_denormal_used flag */
4490         goto soft;
4491     }
4492 
4493     if (isgreaterequal(ua.h, ub.h)) {
4494         if (isgreater(ua.h, ub.h)) {
4495             return float_relation_greater;
4496         }
4497         return float_relation_equal;
4498     }
4499     if (likely(isless(ua.h, ub.h))) {
4500         return float_relation_less;
4501     }
4502     /*
4503      * The only condition remaining is unordered.
4504      * Fall through to set flags.
4505      */
4506  soft:
4507     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4508 }
4509 
4510 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4511 {
4512     return float64_hs_compare(a, b, s, false);
4513 }
4514 
4515 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4516 {
4517     return float64_hs_compare(a, b, s, true);
4518 }
4519 
4520 static FloatRelation QEMU_FLATTEN
4521 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4522 {
4523     FloatParts64 pa, pb;
4524 
4525     bfloat16_unpack_canonical(&pa, a, s);
4526     bfloat16_unpack_canonical(&pb, b, s);
4527     return parts_compare(&pa, &pb, s, is_quiet);
4528 }
4529 
4530 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4531 {
4532     return bfloat16_do_compare(a, b, s, false);
4533 }
4534 
4535 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4536 {
4537     return bfloat16_do_compare(a, b, s, true);
4538 }
4539 
4540 static FloatRelation QEMU_FLATTEN
4541 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4542 {
4543     FloatParts128 pa, pb;
4544 
4545     float128_unpack_canonical(&pa, a, s);
4546     float128_unpack_canonical(&pb, b, s);
4547     return parts_compare(&pa, &pb, s, is_quiet);
4548 }
4549 
4550 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4551 {
4552     return float128_do_compare(a, b, s, false);
4553 }
4554 
4555 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4556 {
4557     return float128_do_compare(a, b, s, true);
4558 }
4559 
4560 static FloatRelation QEMU_FLATTEN
4561 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4562 {
4563     FloatParts128 pa, pb;
4564 
4565     if (!floatx80_unpack_canonical(&pa, a, s) ||
4566         !floatx80_unpack_canonical(&pb, b, s)) {
4567         return float_relation_unordered;
4568     }
4569     return parts_compare(&pa, &pb, s, is_quiet);
4570 }
4571 
4572 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4573 {
4574     return floatx80_do_compare(a, b, s, false);
4575 }
4576 
4577 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4578 {
4579     return floatx80_do_compare(a, b, s, true);
4580 }
4581 
4582 /*
4583  * Scale by 2**N
4584  */
4585 
4586 float16 float16_scalbn(float16 a, int n, float_status *status)
4587 {
4588     FloatParts64 p;
4589 
4590     float16_unpack_canonical(&p, a, status);
4591     parts_scalbn(&p, n, status);
4592     return float16_round_pack_canonical(&p, status);
4593 }
4594 
4595 float32 float32_scalbn(float32 a, int n, float_status *status)
4596 {
4597     FloatParts64 p;
4598 
4599     float32_unpack_canonical(&p, a, status);
4600     parts_scalbn(&p, n, status);
4601     return float32_round_pack_canonical(&p, status);
4602 }
4603 
4604 float64 float64_scalbn(float64 a, int n, float_status *status)
4605 {
4606     FloatParts64 p;
4607 
4608     float64_unpack_canonical(&p, a, status);
4609     parts_scalbn(&p, n, status);
4610     return float64_round_pack_canonical(&p, status);
4611 }
4612 
4613 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4614 {
4615     FloatParts64 p;
4616 
4617     bfloat16_unpack_canonical(&p, a, status);
4618     parts_scalbn(&p, n, status);
4619     return bfloat16_round_pack_canonical(&p, status);
4620 }
4621 
4622 float128 float128_scalbn(float128 a, int n, float_status *status)
4623 {
4624     FloatParts128 p;
4625 
4626     float128_unpack_canonical(&p, a, status);
4627     parts_scalbn(&p, n, status);
4628     return float128_round_pack_canonical(&p, status);
4629 }
4630 
4631 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4632 {
4633     FloatParts128 p;
4634 
4635     if (!floatx80_unpack_canonical(&p, a, status)) {
4636         return floatx80_default_nan(status);
4637     }
4638     parts_scalbn(&p, n, status);
4639     return floatx80_round_pack_canonical(&p, status);
4640 }
4641 
4642 /*
4643  * Square Root
4644  */
4645 
4646 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4647 {
4648     FloatParts64 p;
4649 
4650     float16_unpack_canonical(&p, a, status);
4651     parts_sqrt(&p, status, &float16_params);
4652     return float16_round_pack_canonical(&p, status);
4653 }
4654 
4655 static float32 QEMU_SOFTFLOAT_ATTR
4656 soft_f32_sqrt(float32 a, float_status *status)
4657 {
4658     FloatParts64 p;
4659 
4660     float32_unpack_canonical(&p, a, status);
4661     parts_sqrt(&p, status, &float32_params);
4662     return float32_round_pack_canonical(&p, status);
4663 }
4664 
4665 static float64 QEMU_SOFTFLOAT_ATTR
4666 soft_f64_sqrt(float64 a, float_status *status)
4667 {
4668     FloatParts64 p;
4669 
4670     float64_unpack_canonical(&p, a, status);
4671     parts_sqrt(&p, status, &float64_params);
4672     return float64_round_pack_canonical(&p, status);
4673 }
4674 
4675 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4676 {
4677     union_float32 ua, ur;
4678 
4679     ua.s = xa;
4680     if (unlikely(!can_use_fpu(s))) {
4681         goto soft;
4682     }
4683 
4684     float32_input_flush1(&ua.s, s);
4685     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4686         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4687                        fpclassify(ua.h) == FP_ZERO) ||
4688                      signbit(ua.h))) {
4689             goto soft;
4690         }
4691     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4692                         float32_is_neg(ua.s))) {
4693         goto soft;
4694     }
4695     ur.h = sqrtf(ua.h);
4696     return ur.s;
4697 
4698  soft:
4699     return soft_f32_sqrt(ua.s, s);
4700 }
4701 
4702 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4703 {
4704     union_float64 ua, ur;
4705 
4706     ua.s = xa;
4707     if (unlikely(!can_use_fpu(s))) {
4708         goto soft;
4709     }
4710 
4711     float64_input_flush1(&ua.s, s);
4712     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4713         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4714                        fpclassify(ua.h) == FP_ZERO) ||
4715                      signbit(ua.h))) {
4716             goto soft;
4717         }
4718     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4719                         float64_is_neg(ua.s))) {
4720         goto soft;
4721     }
4722     ur.h = sqrt(ua.h);
4723     return ur.s;
4724 
4725  soft:
4726     return soft_f64_sqrt(ua.s, s);
4727 }
4728 
4729 float64 float64r32_sqrt(float64 a, float_status *status)
4730 {
4731     FloatParts64 p;
4732 
4733     float64_unpack_canonical(&p, a, status);
4734     parts_sqrt(&p, status, &float64_params);
4735     return float64r32_round_pack_canonical(&p, status);
4736 }
4737 
4738 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4739 {
4740     FloatParts64 p;
4741 
4742     bfloat16_unpack_canonical(&p, a, status);
4743     parts_sqrt(&p, status, &bfloat16_params);
4744     return bfloat16_round_pack_canonical(&p, status);
4745 }
4746 
4747 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4748 {
4749     FloatParts128 p;
4750 
4751     float128_unpack_canonical(&p, a, status);
4752     parts_sqrt(&p, status, &float128_params);
4753     return float128_round_pack_canonical(&p, status);
4754 }
4755 
4756 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4757 {
4758     FloatParts128 p;
4759 
4760     if (!floatx80_unpack_canonical(&p, a, s)) {
4761         return floatx80_default_nan(s);
4762     }
4763     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4764     return floatx80_round_pack_canonical(&p, s);
4765 }
4766 
4767 /*
4768  * log2
4769  */
4770 float32 float32_log2(float32 a, float_status *status)
4771 {
4772     FloatParts64 p;
4773 
4774     float32_unpack_canonical(&p, a, status);
4775     parts_log2(&p, status, &float32_params);
4776     return float32_round_pack_canonical(&p, status);
4777 }
4778 
4779 float64 float64_log2(float64 a, float_status *status)
4780 {
4781     FloatParts64 p;
4782 
4783     float64_unpack_canonical(&p, a, status);
4784     parts_log2(&p, status, &float64_params);
4785     return float64_round_pack_canonical(&p, status);
4786 }
4787 
4788 /*----------------------------------------------------------------------------
4789 | The pattern for a default generated NaN.
4790 *----------------------------------------------------------------------------*/
4791 
4792 float16 float16_default_nan(float_status *status)
4793 {
4794     FloatParts64 p;
4795 
4796     parts_default_nan(&p, status);
4797     p.frac >>= float16_params.frac_shift;
4798     return float16_pack_raw(&p);
4799 }
4800 
4801 float32 float32_default_nan(float_status *status)
4802 {
4803     FloatParts64 p;
4804 
4805     parts_default_nan(&p, status);
4806     p.frac >>= float32_params.frac_shift;
4807     return float32_pack_raw(&p);
4808 }
4809 
4810 float64 float64_default_nan(float_status *status)
4811 {
4812     FloatParts64 p;
4813 
4814     parts_default_nan(&p, status);
4815     p.frac >>= float64_params.frac_shift;
4816     return float64_pack_raw(&p);
4817 }
4818 
4819 float128 float128_default_nan(float_status *status)
4820 {
4821     FloatParts128 p;
4822 
4823     parts_default_nan(&p, status);
4824     frac_shr(&p, float128_params.frac_shift);
4825     return float128_pack_raw(&p);
4826 }
4827 
4828 bfloat16 bfloat16_default_nan(float_status *status)
4829 {
4830     FloatParts64 p;
4831 
4832     parts_default_nan(&p, status);
4833     p.frac >>= bfloat16_params.frac_shift;
4834     return bfloat16_pack_raw(&p);
4835 }
4836 
4837 /*----------------------------------------------------------------------------
4838 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4839 *----------------------------------------------------------------------------*/
4840 
4841 float16 float16_silence_nan(float16 a, float_status *status)
4842 {
4843     FloatParts64 p;
4844 
4845     float16_unpack_raw(&p, a);
4846     p.frac <<= float16_params.frac_shift;
4847     parts_silence_nan(&p, status);
4848     p.frac >>= float16_params.frac_shift;
4849     return float16_pack_raw(&p);
4850 }
4851 
4852 float32 float32_silence_nan(float32 a, float_status *status)
4853 {
4854     FloatParts64 p;
4855 
4856     float32_unpack_raw(&p, a);
4857     p.frac <<= float32_params.frac_shift;
4858     parts_silence_nan(&p, status);
4859     p.frac >>= float32_params.frac_shift;
4860     return float32_pack_raw(&p);
4861 }
4862 
4863 float64 float64_silence_nan(float64 a, float_status *status)
4864 {
4865     FloatParts64 p;
4866 
4867     float64_unpack_raw(&p, a);
4868     p.frac <<= float64_params.frac_shift;
4869     parts_silence_nan(&p, status);
4870     p.frac >>= float64_params.frac_shift;
4871     return float64_pack_raw(&p);
4872 }
4873 
4874 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4875 {
4876     FloatParts64 p;
4877 
4878     bfloat16_unpack_raw(&p, a);
4879     p.frac <<= bfloat16_params.frac_shift;
4880     parts_silence_nan(&p, status);
4881     p.frac >>= bfloat16_params.frac_shift;
4882     return bfloat16_pack_raw(&p);
4883 }
4884 
4885 float128 float128_silence_nan(float128 a, float_status *status)
4886 {
4887     FloatParts128 p;
4888 
4889     float128_unpack_raw(&p, a);
4890     frac_shl(&p, float128_params.frac_shift);
4891     parts_silence_nan(&p, status);
4892     frac_shr(&p, float128_params.frac_shift);
4893     return float128_pack_raw(&p);
4894 }
4895 
4896 /*----------------------------------------------------------------------------
4897 | If `a' is denormal and we are in flush-to-zero mode then set the
4898 | input-denormal exception and return zero. Otherwise just return the value.
4899 *----------------------------------------------------------------------------*/
4900 
4901 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4902 {
4903     if (p.exp == 0 && p.frac != 0) {
4904         float_raise(float_flag_input_denormal_flushed, status);
4905         return true;
4906     }
4907 
4908     return false;
4909 }
4910 
4911 float16 float16_squash_input_denormal(float16 a, float_status *status)
4912 {
4913     if (status->flush_inputs_to_zero) {
4914         FloatParts64 p;
4915 
4916         float16_unpack_raw(&p, a);
4917         if (parts_squash_denormal(p, status)) {
4918             return float16_set_sign(float16_zero, p.sign);
4919         }
4920     }
4921     return a;
4922 }
4923 
4924 float32 float32_squash_input_denormal(float32 a, float_status *status)
4925 {
4926     if (status->flush_inputs_to_zero) {
4927         FloatParts64 p;
4928 
4929         float32_unpack_raw(&p, a);
4930         if (parts_squash_denormal(p, status)) {
4931             return float32_set_sign(float32_zero, p.sign);
4932         }
4933     }
4934     return a;
4935 }
4936 
4937 float64 float64_squash_input_denormal(float64 a, float_status *status)
4938 {
4939     if (status->flush_inputs_to_zero) {
4940         FloatParts64 p;
4941 
4942         float64_unpack_raw(&p, a);
4943         if (parts_squash_denormal(p, status)) {
4944             return float64_set_sign(float64_zero, p.sign);
4945         }
4946     }
4947     return a;
4948 }
4949 
4950 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4951 {
4952     if (status->flush_inputs_to_zero) {
4953         FloatParts64 p;
4954 
4955         bfloat16_unpack_raw(&p, a);
4956         if (parts_squash_denormal(p, status)) {
4957             return bfloat16_set_sign(bfloat16_zero, p.sign);
4958         }
4959     }
4960     return a;
4961 }
4962 
4963 /*----------------------------------------------------------------------------
4964 | Normalizes the subnormal extended double-precision floating-point value
4965 | represented by the denormalized significand `aSig'.  The normalized exponent
4966 | and significand are stored at the locations pointed to by `zExpPtr' and
4967 | `zSigPtr', respectively.
4968 *----------------------------------------------------------------------------*/
4969 
4970 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4971                                 uint64_t *zSigPtr)
4972 {
4973     int8_t shiftCount;
4974 
4975     shiftCount = clz64(aSig);
4976     *zSigPtr = aSig<<shiftCount;
4977     *zExpPtr = 1 - shiftCount;
4978 }
4979 
4980 /*----------------------------------------------------------------------------
4981 | Takes two extended double-precision floating-point values `a' and `b', one
4982 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
4983 | `b' is a signaling NaN, the invalid exception is raised.
4984 *----------------------------------------------------------------------------*/
4985 
4986 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
4987 {
4988     FloatParts128 pa, pb, *pr;
4989 
4990     if (!floatx80_unpack_canonical(&pa, a, status) ||
4991         !floatx80_unpack_canonical(&pb, b, status)) {
4992         return floatx80_default_nan(status);
4993     }
4994 
4995     pr = parts_pick_nan(&pa, &pb, status);
4996     return floatx80_round_pack_canonical(pr, status);
4997 }
4998 
4999 /*----------------------------------------------------------------------------
5000 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5001 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
5002 | and returns the proper extended double-precision floating-point value
5003 | corresponding to the abstract input.  Ordinarily, the abstract value is
5004 | rounded and packed into the extended double-precision format, with the
5005 | inexact exception raised if the abstract input cannot be represented
5006 | exactly.  However, if the abstract value is too large, the overflow and
5007 | inexact exceptions are raised and an infinity or maximal finite value is
5008 | returned.  If the abstract value is too small, the input value is rounded to
5009 | a subnormal number, and the underflow and inexact exceptions are raised if
5010 | the abstract input cannot be represented exactly as a subnormal extended
5011 | double-precision floating-point number.
5012 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
5013 | the result is rounded to the same number of bits as single or double
5014 | precision, respectively.  Otherwise, the result is rounded to the full
5015 | precision of the extended double-precision format.
5016 |     The input significand must be normalized or smaller.  If the input
5017 | significand is not normalized, `zExp' must be 0; in that case, the result
5018 | returned is a subnormal number, and it must not require rounding.  The
5019 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
5020 | Floating-Point Arithmetic.
5021 *----------------------------------------------------------------------------*/
5022 
5023 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
5024                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
5025                               float_status *status)
5026 {
5027     FloatRoundMode roundingMode;
5028     bool roundNearestEven, increment, isTiny;
5029     int64_t roundIncrement, roundMask, roundBits;
5030 
5031     roundingMode = status->float_rounding_mode;
5032     roundNearestEven = ( roundingMode == float_round_nearest_even );
5033     switch (roundingPrecision) {
5034     case floatx80_precision_x:
5035         goto precision80;
5036     case floatx80_precision_d:
5037         roundIncrement = UINT64_C(0x0000000000000400);
5038         roundMask = UINT64_C(0x00000000000007FF);
5039         break;
5040     case floatx80_precision_s:
5041         roundIncrement = UINT64_C(0x0000008000000000);
5042         roundMask = UINT64_C(0x000000FFFFFFFFFF);
5043         break;
5044     default:
5045         g_assert_not_reached();
5046     }
5047     zSig0 |= ( zSig1 != 0 );
5048     switch (roundingMode) {
5049     case float_round_nearest_even:
5050     case float_round_ties_away:
5051         break;
5052     case float_round_to_zero:
5053         roundIncrement = 0;
5054         break;
5055     case float_round_up:
5056         roundIncrement = zSign ? 0 : roundMask;
5057         break;
5058     case float_round_down:
5059         roundIncrement = zSign ? roundMask : 0;
5060         break;
5061     default:
5062         abort();
5063     }
5064     roundBits = zSig0 & roundMask;
5065     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5066         if (    ( 0x7FFE < zExp )
5067              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
5068            ) {
5069             goto overflow;
5070         }
5071         if ( zExp <= 0 ) {
5072             if (status->flush_to_zero) {
5073                 float_raise(float_flag_output_denormal_flushed, status);
5074                 return packFloatx80(zSign, 0, 0);
5075             }
5076             isTiny = status->tininess_before_rounding
5077                   || (zExp < 0 )
5078                   || (zSig0 <= zSig0 + roundIncrement);
5079             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5080             zExp = 0;
5081             roundBits = zSig0 & roundMask;
5082             if (isTiny && roundBits) {
5083                 float_raise(float_flag_underflow, status);
5084             }
5085             if (roundBits) {
5086                 float_raise(float_flag_inexact, status);
5087             }
5088             zSig0 += roundIncrement;
5089             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5090             roundIncrement = roundMask + 1;
5091             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5092                 roundMask |= roundIncrement;
5093             }
5094             zSig0 &= ~ roundMask;
5095             return packFloatx80( zSign, zExp, zSig0 );
5096         }
5097     }
5098     if (roundBits) {
5099         float_raise(float_flag_inexact, status);
5100     }
5101     zSig0 += roundIncrement;
5102     if ( zSig0 < roundIncrement ) {
5103         ++zExp;
5104         zSig0 = UINT64_C(0x8000000000000000);
5105     }
5106     roundIncrement = roundMask + 1;
5107     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5108         roundMask |= roundIncrement;
5109     }
5110     zSig0 &= ~ roundMask;
5111     if ( zSig0 == 0 ) zExp = 0;
5112     return packFloatx80( zSign, zExp, zSig0 );
5113  precision80:
5114     switch (roundingMode) {
5115     case float_round_nearest_even:
5116     case float_round_ties_away:
5117         increment = ((int64_t)zSig1 < 0);
5118         break;
5119     case float_round_to_zero:
5120         increment = 0;
5121         break;
5122     case float_round_up:
5123         increment = !zSign && zSig1;
5124         break;
5125     case float_round_down:
5126         increment = zSign && zSig1;
5127         break;
5128     default:
5129         abort();
5130     }
5131     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5132         if (    ( 0x7FFE < zExp )
5133              || (    ( zExp == 0x7FFE )
5134                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5135                   && increment
5136                 )
5137            ) {
5138             roundMask = 0;
5139  overflow:
5140             float_raise(float_flag_overflow | float_flag_inexact, status);
5141             if (    ( roundingMode == float_round_to_zero )
5142                  || ( zSign && ( roundingMode == float_round_up ) )
5143                  || ( ! zSign && ( roundingMode == float_round_down ) )
5144                ) {
5145                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5146             }
5147             return packFloatx80(zSign,
5148                                 floatx80_infinity_high,
5149                                 floatx80_infinity_low);
5150         }
5151         if ( zExp <= 0 ) {
5152             isTiny = status->tininess_before_rounding
5153                   || (zExp < 0)
5154                   || !increment
5155                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5156             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5157             zExp = 0;
5158             if (isTiny && zSig1) {
5159                 float_raise(float_flag_underflow, status);
5160             }
5161             if (zSig1) {
5162                 float_raise(float_flag_inexact, status);
5163             }
5164             switch (roundingMode) {
5165             case float_round_nearest_even:
5166             case float_round_ties_away:
5167                 increment = ((int64_t)zSig1 < 0);
5168                 break;
5169             case float_round_to_zero:
5170                 increment = 0;
5171                 break;
5172             case float_round_up:
5173                 increment = !zSign && zSig1;
5174                 break;
5175             case float_round_down:
5176                 increment = zSign && zSig1;
5177                 break;
5178             default:
5179                 abort();
5180             }
5181             if ( increment ) {
5182                 ++zSig0;
5183                 if (!(zSig1 << 1) && roundNearestEven) {
5184                     zSig0 &= ~1;
5185                 }
5186                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5187             }
5188             return packFloatx80( zSign, zExp, zSig0 );
5189         }
5190     }
5191     if (zSig1) {
5192         float_raise(float_flag_inexact, status);
5193     }
5194     if ( increment ) {
5195         ++zSig0;
5196         if ( zSig0 == 0 ) {
5197             ++zExp;
5198             zSig0 = UINT64_C(0x8000000000000000);
5199         }
5200         else {
5201             if (!(zSig1 << 1) && roundNearestEven) {
5202                 zSig0 &= ~1;
5203             }
5204         }
5205     }
5206     else {
5207         if ( zSig0 == 0 ) zExp = 0;
5208     }
5209     return packFloatx80( zSign, zExp, zSig0 );
5210 
5211 }
5212 
5213 /*----------------------------------------------------------------------------
5214 | Takes an abstract floating-point value having sign `zSign', exponent
5215 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5216 | and returns the proper extended double-precision floating-point value
5217 | corresponding to the abstract input.  This routine is just like
5218 | `roundAndPackFloatx80' except that the input significand does not have to be
5219 | normalized.
5220 *----------------------------------------------------------------------------*/
5221 
5222 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5223                                        bool zSign, int32_t zExp,
5224                                        uint64_t zSig0, uint64_t zSig1,
5225                                        float_status *status)
5226 {
5227     int8_t shiftCount;
5228 
5229     if ( zSig0 == 0 ) {
5230         zSig0 = zSig1;
5231         zSig1 = 0;
5232         zExp -= 64;
5233     }
5234     shiftCount = clz64(zSig0);
5235     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5236     zExp -= shiftCount;
5237     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5238                                 zSig0, zSig1, status);
5239 
5240 }
5241 
5242 /*----------------------------------------------------------------------------
5243 | Returns the binary exponential of the single-precision floating-point value
5244 | `a'. The operation is performed according to the IEC/IEEE Standard for
5245 | Binary Floating-Point Arithmetic.
5246 |
5247 | Uses the following identities:
5248 |
5249 | 1. -------------------------------------------------------------------------
5250 |      x    x*ln(2)
5251 |     2  = e
5252 |
5253 | 2. -------------------------------------------------------------------------
5254 |                      2     3     4     5           n
5255 |      x        x     x     x     x     x           x
5256 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5257 |               1!    2!    3!    4!    5!          n!
5258 *----------------------------------------------------------------------------*/
5259 
5260 static const float64 float32_exp2_coefficients[15] =
5261 {
5262     const_float64( 0x3ff0000000000000ll ), /*  1 */
5263     const_float64( 0x3fe0000000000000ll ), /*  2 */
5264     const_float64( 0x3fc5555555555555ll ), /*  3 */
5265     const_float64( 0x3fa5555555555555ll ), /*  4 */
5266     const_float64( 0x3f81111111111111ll ), /*  5 */
5267     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5268     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5269     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5270     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5271     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5272     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5273     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5274     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5275     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5276     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5277 };
5278 
5279 float32 float32_exp2(float32 a, float_status *status)
5280 {
5281     FloatParts64 xp, xnp, tp, rp;
5282     int i;
5283 
5284     float32_unpack_canonical(&xp, a, status);
5285     if (unlikely(xp.cls != float_class_normal)) {
5286         switch (xp.cls) {
5287         case float_class_denormal:
5288             break;
5289         case float_class_snan:
5290         case float_class_qnan:
5291             parts_return_nan(&xp, status);
5292             return float32_round_pack_canonical(&xp, status);
5293         case float_class_inf:
5294             return xp.sign ? float32_zero : a;
5295         case float_class_zero:
5296             return float32_one;
5297         default:
5298             g_assert_not_reached();
5299         }
5300     }
5301 
5302     float_raise(float_flag_inexact, status);
5303 
5304     float64_unpack_canonical(&tp, float64_ln2, status);
5305     xp = *parts_mul(&xp, &tp, status);
5306     xnp = xp;
5307 
5308     float64_unpack_canonical(&rp, float64_one, status);
5309     for (i = 0 ; i < 15 ; i++) {
5310 
5311         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5312         rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
5313         xnp = *parts_mul(&xnp, &xp, status);
5314     }
5315 
5316     return float32_round_pack_canonical(&rp, status);
5317 }
5318 
5319 /*----------------------------------------------------------------------------
5320 | Rounds the extended double-precision floating-point value `a'
5321 | to the precision provided by floatx80_rounding_precision and returns the
5322 | result as an extended double-precision floating-point value.
5323 | The operation is performed according to the IEC/IEEE Standard for Binary
5324 | Floating-Point Arithmetic.
5325 *----------------------------------------------------------------------------*/
5326 
5327 floatx80 floatx80_round(floatx80 a, float_status *status)
5328 {
5329     FloatParts128 p;
5330 
5331     if (!floatx80_unpack_canonical(&p, a, status)) {
5332         return floatx80_default_nan(status);
5333     }
5334     return floatx80_round_pack_canonical(&p, status);
5335 }
5336 
5337 static void __attribute__((constructor)) softfloat_init(void)
5338 {
5339     union_float64 ua, ub, uc, ur;
5340 
5341     if (QEMU_NO_HARDFLOAT) {
5342         return;
5343     }
5344     /*
5345      * Test that the host's FMA is not obviously broken. For example,
5346      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5347      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5348      */
5349     ua.s = 0x0020000000000001ULL;
5350     ub.s = 0x3ca0000000000000ULL;
5351     uc.s = 0x0020000000000000ULL;
5352     ur.h = fma(ua.h, ub.h, uc.h);
5353     if (ur.s != 0x0020000000000001ULL) {
5354         force_soft_fma = true;
5355     }
5356 }
5357