xref: /qemu/fpu/softfloat.c (revision c5d4173fcf04f6de9b9bb0959d1fdfc08254381a)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal_flushed, s);          \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 # if defined(__FAST_MATH__)
224 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
225     IEEE implementation
226 # define QEMU_NO_HARDFLOAT 1
227 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
228 #else
229 # define QEMU_NO_HARDFLOAT 0
230 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
231 #endif
232 
233 static inline bool can_use_fpu(const float_status *s)
234 {
235     if (QEMU_NO_HARDFLOAT) {
236         return false;
237     }
238     return likely(s->float_exception_flags & float_flag_inexact &&
239                   s->float_rounding_mode == float_round_nearest_even);
240 }
241 
242 /*
243  * Hardfloat generation functions. Each operation can have two flavors:
244  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
245  * most condition checks, or native ones (e.g. fpclassify).
246  *
247  * The flavor is chosen by the callers. Instead of using macros, we rely on the
248  * compiler to propagate constants and inline everything into the callers.
249  *
250  * We only generate functions for operations with two inputs, since only
251  * these are common enough to justify consolidating them into common code.
252  */
253 
254 typedef union {
255     float32 s;
256     float h;
257 } union_float32;
258 
259 typedef union {
260     float64 s;
261     double h;
262 } union_float64;
263 
264 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
265 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
266 
267 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
268 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
269 typedef float   (*hard_f32_op2_fn)(float a, float b);
270 typedef double  (*hard_f64_op2_fn)(double a, double b);
271 
272 /* 2-input is-zero-or-normal */
273 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
274 {
275     if (QEMU_HARDFLOAT_2F32_USE_FP) {
276         /*
277          * Not using a temp variable for consecutive fpclassify calls ends up
278          * generating faster code.
279          */
280         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
281                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
282     }
283     return float32_is_zero_or_normal(a.s) &&
284            float32_is_zero_or_normal(b.s);
285 }
286 
287 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
288 {
289     if (QEMU_HARDFLOAT_2F64_USE_FP) {
290         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
291                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
292     }
293     return float64_is_zero_or_normal(a.s) &&
294            float64_is_zero_or_normal(b.s);
295 }
296 
297 /* 3-input is-zero-or-normal */
298 static inline
299 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
300 {
301     if (QEMU_HARDFLOAT_3F32_USE_FP) {
302         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
303                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
304                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
305     }
306     return float32_is_zero_or_normal(a.s) &&
307            float32_is_zero_or_normal(b.s) &&
308            float32_is_zero_or_normal(c.s);
309 }
310 
311 static inline
312 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
313 {
314     if (QEMU_HARDFLOAT_3F64_USE_FP) {
315         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
316                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
317                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
318     }
319     return float64_is_zero_or_normal(a.s) &&
320            float64_is_zero_or_normal(b.s) &&
321            float64_is_zero_or_normal(c.s);
322 }
323 
324 static inline bool f32_is_inf(union_float32 a)
325 {
326     if (QEMU_HARDFLOAT_USE_ISINF) {
327         return isinf(a.h);
328     }
329     return float32_is_infinity(a.s);
330 }
331 
332 static inline bool f64_is_inf(union_float64 a)
333 {
334     if (QEMU_HARDFLOAT_USE_ISINF) {
335         return isinf(a.h);
336     }
337     return float64_is_infinity(a.s);
338 }
339 
340 static inline float32
341 float32_gen2(float32 xa, float32 xb, float_status *s,
342              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
343              f32_check_fn pre, f32_check_fn post)
344 {
345     union_float32 ua, ub, ur;
346 
347     ua.s = xa;
348     ub.s = xb;
349 
350     if (unlikely(!can_use_fpu(s))) {
351         goto soft;
352     }
353 
354     float32_input_flush2(&ua.s, &ub.s, s);
355     if (unlikely(!pre(ua, ub))) {
356         goto soft;
357     }
358 
359     ur.h = hard(ua.h, ub.h);
360     if (unlikely(f32_is_inf(ur))) {
361         float_raise(float_flag_overflow, s);
362     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
363         goto soft;
364     }
365     return ur.s;
366 
367  soft:
368     return soft(ua.s, ub.s, s);
369 }
370 
371 static inline float64
372 float64_gen2(float64 xa, float64 xb, float_status *s,
373              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
374              f64_check_fn pre, f64_check_fn post)
375 {
376     union_float64 ua, ub, ur;
377 
378     ua.s = xa;
379     ub.s = xb;
380 
381     if (unlikely(!can_use_fpu(s))) {
382         goto soft;
383     }
384 
385     float64_input_flush2(&ua.s, &ub.s, s);
386     if (unlikely(!pre(ua, ub))) {
387         goto soft;
388     }
389 
390     ur.h = hard(ua.h, ub.h);
391     if (unlikely(f64_is_inf(ur))) {
392         float_raise(float_flag_overflow, s);
393     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
394         goto soft;
395     }
396     return ur.s;
397 
398  soft:
399     return soft(ua.s, ub.s, s);
400 }
401 
402 /*
403  * Classify a floating point number. Everything above float_class_qnan
404  * is a NaN so cls >= float_class_qnan is any NaN.
405  *
406  * Note that we canonicalize denormals, so most code should treat
407  * class_normal and class_denormal identically.
408  */
409 
410 typedef enum __attribute__ ((__packed__)) {
411     float_class_unclassified,
412     float_class_zero,
413     float_class_normal,
414     float_class_denormal, /* input was a non-squashed denormal */
415     float_class_inf,
416     float_class_qnan,  /* all NaNs from here */
417     float_class_snan,
418 } FloatClass;
419 
420 #define float_cmask(bit)  (1u << (bit))
421 
422 enum {
423     float_cmask_zero    = float_cmask(float_class_zero),
424     float_cmask_normal  = float_cmask(float_class_normal),
425     float_cmask_denormal = float_cmask(float_class_denormal),
426     float_cmask_inf     = float_cmask(float_class_inf),
427     float_cmask_qnan    = float_cmask(float_class_qnan),
428     float_cmask_snan    = float_cmask(float_class_snan),
429 
430     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
431     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
432     float_cmask_anynorm = float_cmask_normal | float_cmask_denormal,
433 };
434 
435 /* Flags for parts_minmax. */
436 enum {
437     /* Set for minimum; clear for maximum. */
438     minmax_ismin = 1,
439     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
440     minmax_isnum = 2,
441     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
442     minmax_ismag = 4,
443     /*
444      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
445      * operations.
446      */
447     minmax_isnumber = 8,
448 };
449 
450 /* Simple helpers for checking if, or what kind of, NaN we have */
451 static inline __attribute__((unused)) bool is_nan(FloatClass c)
452 {
453     return unlikely(c >= float_class_qnan);
454 }
455 
456 static inline __attribute__((unused)) bool is_snan(FloatClass c)
457 {
458     return c == float_class_snan;
459 }
460 
461 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
462 {
463     return c == float_class_qnan;
464 }
465 
466 /*
467  * Return true if the float_cmask has only normals in it
468  * (including input denormals that were canonicalized)
469  */
470 static inline bool cmask_is_only_normals(int cmask)
471 {
472     return !(cmask & ~float_cmask_anynorm);
473 }
474 
475 static inline bool is_anynorm(FloatClass c)
476 {
477     return float_cmask(c) & float_cmask_anynorm;
478 }
479 
480 /*
481  * Structure holding all of the decomposed parts of a float.
482  * The exponent is unbiased and the fraction is normalized.
483  *
484  * The fraction words are stored in big-endian word ordering,
485  * so that truncation from a larger format to a smaller format
486  * can be done simply by ignoring subsequent elements.
487  */
488 
489 typedef struct {
490     FloatClass cls;
491     bool sign;
492     int32_t exp;
493     union {
494         /* Routines that know the structure may reference the singular name. */
495         uint64_t frac;
496         /*
497          * Routines expanded with multiple structures reference "hi" and "lo"
498          * depending on the operation.  In FloatParts64, "hi" and "lo" are
499          * both the same word and aliased here.
500          */
501         uint64_t frac_hi;
502         uint64_t frac_lo;
503     };
504 } FloatParts64;
505 
506 typedef struct {
507     FloatClass cls;
508     bool sign;
509     int32_t exp;
510     uint64_t frac_hi;
511     uint64_t frac_lo;
512 } FloatParts128;
513 
514 typedef struct {
515     FloatClass cls;
516     bool sign;
517     int32_t exp;
518     uint64_t frac_hi;
519     uint64_t frac_hm;  /* high-middle */
520     uint64_t frac_lm;  /* low-middle */
521     uint64_t frac_lo;
522 } FloatParts256;
523 
524 /* These apply to the most significant word of each FloatPartsN. */
525 #define DECOMPOSED_BINARY_POINT    63
526 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
527 
528 /* Structure holding all of the relevant parameters for a format.
529  *   exp_size: the size of the exponent field
530  *   exp_bias: the offset applied to the exponent field
531  *   exp_max: the maximum normalised exponent
532  *   frac_size: the size of the fraction field
533  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
534  * The following are computed based the size of fraction
535  *   round_mask: bits below lsb which must be rounded
536  * The following optional modifiers are available:
537  *   arm_althp: handle ARM Alternative Half Precision
538  *   has_explicit_bit: has an explicit integer bit; this affects whether
539  *   the float_status floatx80_behaviour handling applies
540  */
541 typedef struct {
542     int exp_size;
543     int exp_bias;
544     int exp_re_bias;
545     int exp_max;
546     int frac_size;
547     int frac_shift;
548     bool arm_althp;
549     bool has_explicit_bit;
550     uint64_t round_mask;
551 } FloatFmt;
552 
553 /* Expand fields based on the size of exponent and fraction */
554 #define FLOAT_PARAMS_(E)                                \
555     .exp_size       = E,                                \
556     .exp_bias       = ((1 << E) - 1) >> 1,              \
557     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
558     .exp_max        = (1 << E) - 1
559 
560 #define FLOAT_PARAMS(E, F)                              \
561     FLOAT_PARAMS_(E),                                   \
562     .frac_size      = F,                                \
563     .frac_shift     = (-F - 1) & 63,                    \
564     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
565 
566 static const FloatFmt float16_params = {
567     FLOAT_PARAMS(5, 10)
568 };
569 
570 static const FloatFmt float16_params_ahp = {
571     FLOAT_PARAMS(5, 10),
572     .arm_althp = true
573 };
574 
575 static const FloatFmt bfloat16_params = {
576     FLOAT_PARAMS(8, 7)
577 };
578 
579 static const FloatFmt float32_params = {
580     FLOAT_PARAMS(8, 23)
581 };
582 
583 static const FloatFmt float64_params = {
584     FLOAT_PARAMS(11, 52)
585 };
586 
587 static const FloatFmt float128_params = {
588     FLOAT_PARAMS(15, 112)
589 };
590 
591 #define FLOATX80_PARAMS(R)              \
592     FLOAT_PARAMS_(15),                  \
593     .frac_size = R == 64 ? 63 : R,      \
594     .frac_shift = 0,                    \
595     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
596 
597 static const FloatFmt floatx80_params[3] = {
598     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
599     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
600     [floatx80_precision_x] = {
601         FLOATX80_PARAMS(64),
602         .has_explicit_bit = true,
603     },
604 };
605 
606 /* Unpack a float to parts, but do not canonicalize.  */
607 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
608 {
609     const int f_size = fmt->frac_size;
610     const int e_size = fmt->exp_size;
611 
612     *r = (FloatParts64) {
613         .cls = float_class_unclassified,
614         .sign = extract64(raw, f_size + e_size, 1),
615         .exp = extract64(raw, f_size, e_size),
616         .frac = extract64(raw, 0, f_size)
617     };
618 }
619 
620 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
621 {
622     unpack_raw64(p, &float16_params, f);
623 }
624 
625 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
626 {
627     unpack_raw64(p, &bfloat16_params, f);
628 }
629 
630 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
631 {
632     unpack_raw64(p, &float32_params, f);
633 }
634 
635 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
636 {
637     unpack_raw64(p, &float64_params, f);
638 }
639 
640 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
641 {
642     *p = (FloatParts128) {
643         .cls = float_class_unclassified,
644         .sign = extract32(f.high, 15, 1),
645         .exp = extract32(f.high, 0, 15),
646         .frac_hi = f.low
647     };
648 }
649 
650 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
651 {
652     const int f_size = float128_params.frac_size - 64;
653     const int e_size = float128_params.exp_size;
654 
655     *p = (FloatParts128) {
656         .cls = float_class_unclassified,
657         .sign = extract64(f.high, f_size + e_size, 1),
658         .exp = extract64(f.high, f_size, e_size),
659         .frac_hi = extract64(f.high, 0, f_size),
660         .frac_lo = f.low,
661     };
662 }
663 
664 /* Pack a float from parts, but do not canonicalize.  */
665 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
666 {
667     const int f_size = fmt->frac_size;
668     const int e_size = fmt->exp_size;
669     uint64_t ret;
670 
671     ret = (uint64_t)p->sign << (f_size + e_size);
672     ret = deposit64(ret, f_size, e_size, p->exp);
673     ret = deposit64(ret, 0, f_size, p->frac);
674     return ret;
675 }
676 
677 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
678 {
679     return make_float16(pack_raw64(p, &float16_params));
680 }
681 
682 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
683 {
684     return pack_raw64(p, &bfloat16_params);
685 }
686 
687 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
688 {
689     return make_float32(pack_raw64(p, &float32_params));
690 }
691 
692 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
693 {
694     return make_float64(pack_raw64(p, &float64_params));
695 }
696 
697 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
698 {
699     const int f_size = float128_params.frac_size - 64;
700     const int e_size = float128_params.exp_size;
701     uint64_t hi;
702 
703     hi = (uint64_t)p->sign << (f_size + e_size);
704     hi = deposit64(hi, f_size, e_size, p->exp);
705     hi = deposit64(hi, 0, f_size, p->frac_hi);
706     return make_float128(hi, p->frac_lo);
707 }
708 
709 /*----------------------------------------------------------------------------
710 | Functions and definitions to determine:  (1) whether tininess for underflow
711 | is detected before or after rounding by default, (2) what (if anything)
712 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
713 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
714 | are propagated from function inputs to output.  These details are target-
715 | specific.
716 *----------------------------------------------------------------------------*/
717 #include "softfloat-specialize.c.inc"
718 
719 #define PARTS_GENERIC_64_128(NAME, P) \
720     _Generic((P), FloatParts64 *: parts64_##NAME, \
721                   FloatParts128 *: parts128_##NAME)
722 
723 #define PARTS_GENERIC_64_128_256(NAME, P) \
724     _Generic((P), FloatParts64 *: parts64_##NAME, \
725                   FloatParts128 *: parts128_##NAME, \
726                   FloatParts256 *: parts256_##NAME)
727 
728 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
729 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
730 
731 static void parts64_return_nan(FloatParts64 *a, float_status *s);
732 static void parts128_return_nan(FloatParts128 *a, float_status *s);
733 
734 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
735 
736 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
737                                       float_status *s);
738 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
739                                         float_status *s);
740 
741 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
742 
743 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
744                                              FloatParts64 *c, float_status *s,
745                                              int ab_mask, int abc_mask);
746 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
747                                                FloatParts128 *b,
748                                                FloatParts128 *c,
749                                                float_status *s,
750                                                int ab_mask, int abc_mask);
751 
752 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
753     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
754 
755 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
756                                  const FloatFmt *fmt);
757 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
758                                   const FloatFmt *fmt);
759 
760 #define parts_canonicalize(A, S, F) \
761     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
762 
763 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
764                                    const FloatFmt *fmt);
765 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
766                                     const FloatFmt *fmt);
767 
768 #define parts_uncanon_normal(A, S, F) \
769     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
770 
771 static void parts64_uncanon(FloatParts64 *p, float_status *status,
772                             const FloatFmt *fmt);
773 static void parts128_uncanon(FloatParts128 *p, float_status *status,
774                              const FloatFmt *fmt);
775 
776 #define parts_uncanon(A, S, F) \
777     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
778 
779 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
780 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
781 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
782 
783 #define parts_add_normal(A, B) \
784     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
785 
786 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
787 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
788 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
789 
790 #define parts_sub_normal(A, B) \
791     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
792 
793 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
794                                     float_status *s, bool subtract);
795 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
796                                       float_status *s, bool subtract);
797 
798 #define parts_addsub(A, B, S, Z) \
799     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
800 
801 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
802                                  float_status *s);
803 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
804                                    float_status *s);
805 
806 #define parts_mul(A, B, S) \
807     PARTS_GENERIC_64_128(mul, A)(A, B, S)
808 
809 static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
810                                            FloatParts64 *c, int scale,
811                                            int flags, float_status *s);
812 static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
813                                              FloatParts128 *c, int scale,
814                                              int flags, float_status *s);
815 
816 #define parts_muladd_scalbn(A, B, C, Z, Y, S) \
817     PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
818 
819 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
820                                  float_status *s);
821 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
822                                    float_status *s);
823 
824 #define parts_div(A, B, S) \
825     PARTS_GENERIC_64_128(div, A)(A, B, S)
826 
827 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
828                                     uint64_t *mod_quot, float_status *s);
829 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
830                                       uint64_t *mod_quot, float_status *s);
831 
832 #define parts_modrem(A, B, Q, S) \
833     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
834 
835 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
836 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
837 
838 #define parts_sqrt(A, S, F) \
839     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
840 
841 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
842                                         int scale, int frac_size);
843 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
844                                          int scale, int frac_size);
845 
846 #define parts_round_to_int_normal(A, R, C, F) \
847     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
848 
849 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
850                                  int scale, float_status *s,
851                                  const FloatFmt *fmt);
852 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
853                                   int scale, float_status *s,
854                                   const FloatFmt *fmt);
855 
856 #define parts_round_to_int(A, R, C, S, F) \
857     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
858 
859 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
860                                      int scale, int64_t min, int64_t max,
861                                      float_status *s);
862 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
863                                      int scale, int64_t min, int64_t max,
864                                      float_status *s);
865 
866 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
867     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
868 
869 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
870                                       int scale, uint64_t max,
871                                       float_status *s);
872 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
873                                        int scale, uint64_t max,
874                                        float_status *s);
875 
876 #define parts_float_to_uint(P, R, Z, M, S) \
877     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
878 
879 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
880                                             FloatRoundMode rmode,
881                                             int bitsm1, float_status *s);
882 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
883                                              FloatRoundMode rmode,
884                                              int bitsm1, float_status *s);
885 
886 #define parts_float_to_sint_modulo(P, R, M, S) \
887     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
888 
889 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
890                                   int scale, float_status *s);
891 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
892                                    int scale, float_status *s);
893 
894 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
895     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
896 
897 #define parts_sint_to_float(P, I, Z, S) \
898     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
899 
900 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
901                                   int scale, float_status *s);
902 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
903                                    int scale, float_status *s);
904 
905 #define parts_uint_to_float(P, I, Z, S) \
906     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
907 
908 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
909                                     float_status *s, int flags);
910 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
911                                       float_status *s, int flags);
912 
913 #define parts_minmax(A, B, S, F) \
914     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
915 
916 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
917                                      float_status *s, bool q);
918 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
919                                       float_status *s, bool q);
920 
921 #define parts_compare(A, B, S, Q) \
922     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
923 
924 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
925 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
926 
927 #define parts_scalbn(A, N, S) \
928     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
929 
930 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
931 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
932 
933 #define parts_log2(A, S, F) \
934     PARTS_GENERIC_64_128(log2, A)(A, S, F)
935 
936 /*
937  * Helper functions for softfloat-parts.c.inc, per-size operations.
938  */
939 
940 #define FRAC_GENERIC_64_128(NAME, P) \
941     _Generic((P), FloatParts64 *: frac64_##NAME, \
942                   FloatParts128 *: frac128_##NAME)
943 
944 #define FRAC_GENERIC_64_128_256(NAME, P) \
945     _Generic((P), FloatParts64 *: frac64_##NAME, \
946                   FloatParts128 *: frac128_##NAME, \
947                   FloatParts256 *: frac256_##NAME)
948 
949 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
950 {
951     return uadd64_overflow(a->frac, b->frac, &r->frac);
952 }
953 
954 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
955 {
956     bool c = 0;
957     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
958     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
959     return c;
960 }
961 
962 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
963 {
964     bool c = 0;
965     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
966     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
967     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
968     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
969     return c;
970 }
971 
972 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
973 
974 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
975 {
976     return uadd64_overflow(a->frac, c, &r->frac);
977 }
978 
979 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
980 {
981     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
982     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
983 }
984 
985 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
986 
987 static void frac64_allones(FloatParts64 *a)
988 {
989     a->frac = -1;
990 }
991 
992 static void frac128_allones(FloatParts128 *a)
993 {
994     a->frac_hi = a->frac_lo = -1;
995 }
996 
997 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
998 
999 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
1000 {
1001     return (a->frac == b->frac ? float_relation_equal
1002             : a->frac < b->frac ? float_relation_less
1003             : float_relation_greater);
1004 }
1005 
1006 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
1007 {
1008     uint64_t ta = a->frac_hi, tb = b->frac_hi;
1009     if (ta == tb) {
1010         ta = a->frac_lo, tb = b->frac_lo;
1011         if (ta == tb) {
1012             return float_relation_equal;
1013         }
1014     }
1015     return ta < tb ? float_relation_less : float_relation_greater;
1016 }
1017 
1018 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1019 
1020 static void frac64_clear(FloatParts64 *a)
1021 {
1022     a->frac = 0;
1023 }
1024 
1025 static void frac128_clear(FloatParts128 *a)
1026 {
1027     a->frac_hi = a->frac_lo = 0;
1028 }
1029 
1030 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1031 
1032 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1033 {
1034     uint64_t n1, n0, r, q;
1035     bool ret;
1036 
1037     /*
1038      * We want a 2*N / N-bit division to produce exactly an N-bit
1039      * result, so that we do not lose any precision and so that we
1040      * do not have to renormalize afterward.  If A.frac < B.frac,
1041      * then division would produce an (N-1)-bit result; shift A left
1042      * by one to produce the an N-bit result, and return true to
1043      * decrement the exponent to match.
1044      *
1045      * The udiv_qrnnd algorithm that we're using requires normalization,
1046      * i.e. the msb of the denominator must be set, which is already true.
1047      */
1048     ret = a->frac < b->frac;
1049     if (ret) {
1050         n0 = a->frac;
1051         n1 = 0;
1052     } else {
1053         n0 = a->frac >> 1;
1054         n1 = a->frac << 63;
1055     }
1056     q = udiv_qrnnd(&r, n0, n1, b->frac);
1057 
1058     /* Set lsb if there is a remainder, to set inexact. */
1059     a->frac = q | (r != 0);
1060 
1061     return ret;
1062 }
1063 
1064 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1065 {
1066     uint64_t q0, q1, a0, a1, b0, b1;
1067     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1068     bool ret = false;
1069 
1070     a0 = a->frac_hi, a1 = a->frac_lo;
1071     b0 = b->frac_hi, b1 = b->frac_lo;
1072 
1073     ret = lt128(a0, a1, b0, b1);
1074     if (!ret) {
1075         a1 = shr_double(a0, a1, 1);
1076         a0 = a0 >> 1;
1077     }
1078 
1079     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1080     q0 = estimateDiv128To64(a0, a1, b0);
1081 
1082     /*
1083      * Estimate is high because B1 was not included (unless B1 == 0).
1084      * Reduce quotient and increase remainder until remainder is non-negative.
1085      * This loop will execute 0 to 2 times.
1086      */
1087     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1088     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1089     while (r0 != 0) {
1090         q0--;
1091         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1092     }
1093 
1094     /* Repeat using the remainder, producing a second word of quotient. */
1095     q1 = estimateDiv128To64(r1, r2, b0);
1096     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1097     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1098     while (r1 != 0) {
1099         q1--;
1100         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1101     }
1102 
1103     /* Any remainder indicates inexact; set sticky bit. */
1104     q1 |= (r2 | r3) != 0;
1105 
1106     a->frac_hi = q0;
1107     a->frac_lo = q1;
1108     return ret;
1109 }
1110 
1111 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1112 
1113 static bool frac64_eqz(FloatParts64 *a)
1114 {
1115     return a->frac == 0;
1116 }
1117 
1118 static bool frac128_eqz(FloatParts128 *a)
1119 {
1120     return (a->frac_hi | a->frac_lo) == 0;
1121 }
1122 
1123 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1124 
1125 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1126 {
1127     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1128 }
1129 
1130 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1131 {
1132     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1133                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1134 }
1135 
1136 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1137 
1138 static void frac64_neg(FloatParts64 *a)
1139 {
1140     a->frac = -a->frac;
1141 }
1142 
1143 static void frac128_neg(FloatParts128 *a)
1144 {
1145     bool c = 0;
1146     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1147     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1148 }
1149 
1150 static void frac256_neg(FloatParts256 *a)
1151 {
1152     bool c = 0;
1153     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1154     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1155     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1156     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1157 }
1158 
1159 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1160 
1161 static int frac64_normalize(FloatParts64 *a)
1162 {
1163     if (a->frac) {
1164         int shift = clz64(a->frac);
1165         a->frac <<= shift;
1166         return shift;
1167     }
1168     return 64;
1169 }
1170 
1171 static int frac128_normalize(FloatParts128 *a)
1172 {
1173     if (a->frac_hi) {
1174         int shl = clz64(a->frac_hi);
1175         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1176         a->frac_lo <<= shl;
1177         return shl;
1178     } else if (a->frac_lo) {
1179         int shl = clz64(a->frac_lo);
1180         a->frac_hi = a->frac_lo << shl;
1181         a->frac_lo = 0;
1182         return shl + 64;
1183     }
1184     return 128;
1185 }
1186 
1187 static int frac256_normalize(FloatParts256 *a)
1188 {
1189     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1190     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1191     int ret, shl;
1192 
1193     if (likely(a0)) {
1194         shl = clz64(a0);
1195         if (shl == 0) {
1196             return 0;
1197         }
1198         ret = shl;
1199     } else {
1200         if (a1) {
1201             ret = 64;
1202             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1203         } else if (a2) {
1204             ret = 128;
1205             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1206         } else if (a3) {
1207             ret = 192;
1208             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1209         } else {
1210             ret = 256;
1211             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1212             goto done;
1213         }
1214         shl = clz64(a0);
1215         if (shl == 0) {
1216             goto done;
1217         }
1218         ret += shl;
1219     }
1220 
1221     a0 = shl_double(a0, a1, shl);
1222     a1 = shl_double(a1, a2, shl);
1223     a2 = shl_double(a2, a3, shl);
1224     a3 <<= shl;
1225 
1226  done:
1227     a->frac_hi = a0;
1228     a->frac_hm = a1;
1229     a->frac_lm = a2;
1230     a->frac_lo = a3;
1231     return ret;
1232 }
1233 
1234 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1235 
1236 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1237 {
1238     uint64_t a0, a1, b0, t0, t1, q, quot;
1239     int exp_diff = a->exp - b->exp;
1240     int shift;
1241 
1242     a0 = a->frac;
1243     a1 = 0;
1244 
1245     if (exp_diff < -1) {
1246         if (mod_quot) {
1247             *mod_quot = 0;
1248         }
1249         return;
1250     }
1251     if (exp_diff == -1) {
1252         a0 >>= 1;
1253         exp_diff = 0;
1254     }
1255 
1256     b0 = b->frac;
1257     quot = q = b0 <= a0;
1258     if (q) {
1259         a0 -= b0;
1260     }
1261 
1262     exp_diff -= 64;
1263     while (exp_diff > 0) {
1264         q = estimateDiv128To64(a0, a1, b0);
1265         q = q > 2 ? q - 2 : 0;
1266         mul64To128(b0, q, &t0, &t1);
1267         sub128(a0, a1, t0, t1, &a0, &a1);
1268         shortShift128Left(a0, a1, 62, &a0, &a1);
1269         exp_diff -= 62;
1270         quot = (quot << 62) + q;
1271     }
1272 
1273     exp_diff += 64;
1274     if (exp_diff > 0) {
1275         q = estimateDiv128To64(a0, a1, b0);
1276         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1277         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1278         sub128(a0, a1, t0, t1, &a0, &a1);
1279         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1280         while (le128(t0, t1, a0, a1)) {
1281             ++q;
1282             sub128(a0, a1, t0, t1, &a0, &a1);
1283         }
1284         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1285     } else {
1286         t0 = b0;
1287         t1 = 0;
1288     }
1289 
1290     if (mod_quot) {
1291         *mod_quot = quot;
1292     } else {
1293         sub128(t0, t1, a0, a1, &t0, &t1);
1294         if (lt128(t0, t1, a0, a1) ||
1295             (eq128(t0, t1, a0, a1) && (q & 1))) {
1296             a0 = t0;
1297             a1 = t1;
1298             a->sign = !a->sign;
1299         }
1300     }
1301 
1302     if (likely(a0)) {
1303         shift = clz64(a0);
1304         shortShift128Left(a0, a1, shift, &a0, &a1);
1305     } else if (likely(a1)) {
1306         shift = clz64(a1);
1307         a0 = a1 << shift;
1308         a1 = 0;
1309         shift += 64;
1310     } else {
1311         a->cls = float_class_zero;
1312         return;
1313     }
1314 
1315     a->exp = b->exp + exp_diff - shift;
1316     a->frac = a0 | (a1 != 0);
1317 }
1318 
1319 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1320                            uint64_t *mod_quot)
1321 {
1322     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1323     int exp_diff = a->exp - b->exp;
1324     int shift;
1325 
1326     a0 = a->frac_hi;
1327     a1 = a->frac_lo;
1328     a2 = 0;
1329 
1330     if (exp_diff < -1) {
1331         if (mod_quot) {
1332             *mod_quot = 0;
1333         }
1334         return;
1335     }
1336     if (exp_diff == -1) {
1337         shift128Right(a0, a1, 1, &a0, &a1);
1338         exp_diff = 0;
1339     }
1340 
1341     b0 = b->frac_hi;
1342     b1 = b->frac_lo;
1343 
1344     quot = q = le128(b0, b1, a0, a1);
1345     if (q) {
1346         sub128(a0, a1, b0, b1, &a0, &a1);
1347     }
1348 
1349     exp_diff -= 64;
1350     while (exp_diff > 0) {
1351         q = estimateDiv128To64(a0, a1, b0);
1352         q = q > 4 ? q - 4 : 0;
1353         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1354         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1355         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1356         exp_diff -= 61;
1357         quot = (quot << 61) + q;
1358     }
1359 
1360     exp_diff += 64;
1361     if (exp_diff > 0) {
1362         q = estimateDiv128To64(a0, a1, b0);
1363         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1364         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1365         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1366         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1367         while (le192(t0, t1, t2, a0, a1, a2)) {
1368             ++q;
1369             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1370         }
1371         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1372     } else {
1373         t0 = b0;
1374         t1 = b1;
1375         t2 = 0;
1376     }
1377 
1378     if (mod_quot) {
1379         *mod_quot = quot;
1380     } else {
1381         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1382         if (lt192(t0, t1, t2, a0, a1, a2) ||
1383             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1384             a0 = t0;
1385             a1 = t1;
1386             a2 = t2;
1387             a->sign = !a->sign;
1388         }
1389     }
1390 
1391     if (likely(a0)) {
1392         shift = clz64(a0);
1393         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1394     } else if (likely(a1)) {
1395         shift = clz64(a1);
1396         shortShift128Left(a1, a2, shift, &a0, &a1);
1397         a2 = 0;
1398         shift += 64;
1399     } else if (likely(a2)) {
1400         shift = clz64(a2);
1401         a0 = a2 << shift;
1402         a1 = a2 = 0;
1403         shift += 128;
1404     } else {
1405         a->cls = float_class_zero;
1406         return;
1407     }
1408 
1409     a->exp = b->exp + exp_diff - shift;
1410     a->frac_hi = a0;
1411     a->frac_lo = a1 | (a2 != 0);
1412 }
1413 
1414 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1415 
1416 static void frac64_shl(FloatParts64 *a, int c)
1417 {
1418     a->frac <<= c;
1419 }
1420 
1421 static void frac128_shl(FloatParts128 *a, int c)
1422 {
1423     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1424 
1425     if (c & 64) {
1426         a0 = a1, a1 = 0;
1427     }
1428 
1429     c &= 63;
1430     if (c) {
1431         a0 = shl_double(a0, a1, c);
1432         a1 = a1 << c;
1433     }
1434 
1435     a->frac_hi = a0;
1436     a->frac_lo = a1;
1437 }
1438 
1439 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1440 
1441 static void frac64_shr(FloatParts64 *a, int c)
1442 {
1443     a->frac >>= c;
1444 }
1445 
1446 static void frac128_shr(FloatParts128 *a, int c)
1447 {
1448     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1449 
1450     if (c & 64) {
1451         a1 = a0, a0 = 0;
1452     }
1453 
1454     c &= 63;
1455     if (c) {
1456         a1 = shr_double(a0, a1, c);
1457         a0 = a0 >> c;
1458     }
1459 
1460     a->frac_hi = a0;
1461     a->frac_lo = a1;
1462 }
1463 
1464 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1465 
1466 static void frac64_shrjam(FloatParts64 *a, int c)
1467 {
1468     uint64_t a0 = a->frac;
1469 
1470     if (likely(c != 0)) {
1471         if (likely(c < 64)) {
1472             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1473         } else {
1474             a0 = a0 != 0;
1475         }
1476         a->frac = a0;
1477     }
1478 }
1479 
1480 static void frac128_shrjam(FloatParts128 *a, int c)
1481 {
1482     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1483     uint64_t sticky = 0;
1484 
1485     if (unlikely(c == 0)) {
1486         return;
1487     } else if (likely(c < 64)) {
1488         /* nothing */
1489     } else if (likely(c < 128)) {
1490         sticky = a1;
1491         a1 = a0;
1492         a0 = 0;
1493         c &= 63;
1494         if (c == 0) {
1495             goto done;
1496         }
1497     } else {
1498         sticky = a0 | a1;
1499         a0 = a1 = 0;
1500         goto done;
1501     }
1502 
1503     sticky |= shr_double(a1, 0, c);
1504     a1 = shr_double(a0, a1, c);
1505     a0 = a0 >> c;
1506 
1507  done:
1508     a->frac_lo = a1 | (sticky != 0);
1509     a->frac_hi = a0;
1510 }
1511 
1512 static void frac256_shrjam(FloatParts256 *a, int c)
1513 {
1514     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1515     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1516     uint64_t sticky = 0;
1517 
1518     if (unlikely(c == 0)) {
1519         return;
1520     } else if (likely(c < 64)) {
1521         /* nothing */
1522     } else if (likely(c < 256)) {
1523         if (unlikely(c & 128)) {
1524             sticky |= a2 | a3;
1525             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1526         }
1527         if (unlikely(c & 64)) {
1528             sticky |= a3;
1529             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1530         }
1531         c &= 63;
1532         if (c == 0) {
1533             goto done;
1534         }
1535     } else {
1536         sticky = a0 | a1 | a2 | a3;
1537         a0 = a1 = a2 = a3 = 0;
1538         goto done;
1539     }
1540 
1541     sticky |= shr_double(a3, 0, c);
1542     a3 = shr_double(a2, a3, c);
1543     a2 = shr_double(a1, a2, c);
1544     a1 = shr_double(a0, a1, c);
1545     a0 = a0 >> c;
1546 
1547  done:
1548     a->frac_lo = a3 | (sticky != 0);
1549     a->frac_lm = a2;
1550     a->frac_hm = a1;
1551     a->frac_hi = a0;
1552 }
1553 
1554 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1555 
1556 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1557 {
1558     return usub64_overflow(a->frac, b->frac, &r->frac);
1559 }
1560 
1561 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1562 {
1563     bool c = 0;
1564     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1565     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1566     return c;
1567 }
1568 
1569 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1570 {
1571     bool c = 0;
1572     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1573     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1574     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1575     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1576     return c;
1577 }
1578 
1579 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1580 
1581 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1582 {
1583     r->frac = a->frac_hi | (a->frac_lo != 0);
1584 }
1585 
1586 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1587 {
1588     r->frac_hi = a->frac_hi;
1589     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1590 }
1591 
1592 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1593 
1594 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1595 {
1596     r->frac_hi = a->frac;
1597     r->frac_lo = 0;
1598 }
1599 
1600 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1601 {
1602     r->frac_hi = a->frac_hi;
1603     r->frac_hm = a->frac_lo;
1604     r->frac_lm = 0;
1605     r->frac_lo = 0;
1606 }
1607 
1608 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1609 
1610 /*
1611  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1612  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1613  * and thus MIT licenced.
1614  */
1615 static const uint16_t rsqrt_tab[128] = {
1616     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1617     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1618     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1619     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1620     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1621     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1622     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1623     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1624     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1625     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1626     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1627     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1628     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1629     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1630     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1631     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1632 };
1633 
1634 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1635 #define FloatPartsN    glue(FloatParts,N)
1636 #define FloatPartsW    glue(FloatParts,W)
1637 
1638 #define N 64
1639 #define W 128
1640 
1641 #include "softfloat-parts-addsub.c.inc"
1642 #include "softfloat-parts.c.inc"
1643 
1644 #undef  N
1645 #undef  W
1646 #define N 128
1647 #define W 256
1648 
1649 #include "softfloat-parts-addsub.c.inc"
1650 #include "softfloat-parts.c.inc"
1651 
1652 #undef  N
1653 #undef  W
1654 #define N            256
1655 
1656 #include "softfloat-parts-addsub.c.inc"
1657 
1658 #undef  N
1659 #undef  W
1660 #undef  partsN
1661 #undef  FloatPartsN
1662 #undef  FloatPartsW
1663 
1664 /*
1665  * Pack/unpack routines with a specific FloatFmt.
1666  */
1667 
1668 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1669                                       float_status *s, const FloatFmt *params)
1670 {
1671     float16_unpack_raw(p, f);
1672     parts_canonicalize(p, s, params);
1673 }
1674 
1675 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1676                                      float_status *s)
1677 {
1678     float16a_unpack_canonical(p, f, s, &float16_params);
1679 }
1680 
1681 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1682                                       float_status *s)
1683 {
1684     bfloat16_unpack_raw(p, f);
1685     parts_canonicalize(p, s, &bfloat16_params);
1686 }
1687 
1688 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1689                                              float_status *s,
1690                                              const FloatFmt *params)
1691 {
1692     parts_uncanon(p, s, params);
1693     return float16_pack_raw(p);
1694 }
1695 
1696 static float16 float16_round_pack_canonical(FloatParts64 *p,
1697                                             float_status *s)
1698 {
1699     return float16a_round_pack_canonical(p, s, &float16_params);
1700 }
1701 
1702 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1703                                               float_status *s)
1704 {
1705     parts_uncanon(p, s, &bfloat16_params);
1706     return bfloat16_pack_raw(p);
1707 }
1708 
1709 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1710                                      float_status *s)
1711 {
1712     float32_unpack_raw(p, f);
1713     parts_canonicalize(p, s, &float32_params);
1714 }
1715 
1716 static float32 float32_round_pack_canonical(FloatParts64 *p,
1717                                             float_status *s)
1718 {
1719     parts_uncanon(p, s, &float32_params);
1720     return float32_pack_raw(p);
1721 }
1722 
1723 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1724                                      float_status *s)
1725 {
1726     float64_unpack_raw(p, f);
1727     parts_canonicalize(p, s, &float64_params);
1728 }
1729 
1730 static float64 float64_round_pack_canonical(FloatParts64 *p,
1731                                             float_status *s)
1732 {
1733     parts_uncanon(p, s, &float64_params);
1734     return float64_pack_raw(p);
1735 }
1736 
1737 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1738                                                float_status *s)
1739 {
1740     parts_uncanon(p, s, &float32_params);
1741 
1742     /*
1743      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1744      * We need to adjust the fraction higher so that the least N bits are
1745      * zero, and the fraction is adjacent to the float64 implicit bit.
1746      */
1747     switch (p->cls) {
1748     case float_class_normal:
1749     case float_class_denormal:
1750         if (unlikely(p->exp == 0)) {
1751             /*
1752              * The result is denormal for float32, but can be represented
1753              * in normalized form for float64.  Adjust, per canonicalize.
1754              */
1755             int shift = frac_normalize(p);
1756             p->exp = (float32_params.frac_shift -
1757                       float32_params.exp_bias - shift + 1 +
1758                       float64_params.exp_bias);
1759             frac_shr(p, float64_params.frac_shift);
1760         } else {
1761             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1762             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1763         }
1764         break;
1765     case float_class_snan:
1766     case float_class_qnan:
1767         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1768         p->exp = float64_params.exp_max;
1769         break;
1770     case float_class_inf:
1771         p->exp = float64_params.exp_max;
1772         break;
1773     case float_class_zero:
1774         break;
1775     default:
1776         g_assert_not_reached();
1777     }
1778 
1779     return float64_pack_raw(p);
1780 }
1781 
1782 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1783                                       float_status *s)
1784 {
1785     float128_unpack_raw(p, f);
1786     parts_canonicalize(p, s, &float128_params);
1787 }
1788 
1789 static float128 float128_round_pack_canonical(FloatParts128 *p,
1790                                               float_status *s)
1791 {
1792     parts_uncanon(p, s, &float128_params);
1793     return float128_pack_raw(p);
1794 }
1795 
1796 /* Returns false if the encoding is invalid. */
1797 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1798                                       float_status *s)
1799 {
1800     /* Ensure rounding precision is set before beginning. */
1801     switch (s->floatx80_rounding_precision) {
1802     case floatx80_precision_x:
1803     case floatx80_precision_d:
1804     case floatx80_precision_s:
1805         break;
1806     default:
1807         g_assert_not_reached();
1808     }
1809 
1810     if (unlikely(floatx80_invalid_encoding(f, s))) {
1811         float_raise(float_flag_invalid, s);
1812         return false;
1813     }
1814 
1815     floatx80_unpack_raw(p, f);
1816 
1817     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1818         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1819     } else {
1820         /* The explicit integer bit is ignored, after invalid checks. */
1821         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1822         p->cls = (p->frac_hi == 0 ? float_class_inf
1823                   : parts_is_snan_frac(p->frac_hi, s)
1824                   ? float_class_snan : float_class_qnan);
1825     }
1826     return true;
1827 }
1828 
1829 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1830                                               float_status *s)
1831 {
1832     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1833     uint64_t frac;
1834     int exp;
1835 
1836     switch (p->cls) {
1837     case float_class_normal:
1838     case float_class_denormal:
1839         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1840             parts_uncanon_normal(p, s, fmt);
1841             frac = p->frac_hi;
1842             exp = p->exp;
1843         } else {
1844             FloatParts64 p64;
1845 
1846             p64.sign = p->sign;
1847             p64.exp = p->exp;
1848             frac_truncjam(&p64, p);
1849             parts_uncanon_normal(&p64, s, fmt);
1850             frac = p64.frac;
1851             exp = p64.exp;
1852         }
1853         if (exp != fmt->exp_max) {
1854             break;
1855         }
1856         /* rounded to inf -- fall through to set frac correctly */
1857 
1858     case float_class_inf:
1859         /* x86 and m68k differ in the setting of the integer bit. */
1860         frac = s->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero ?
1861             0 : (1ULL << 63);
1862         exp = fmt->exp_max;
1863         break;
1864 
1865     case float_class_zero:
1866         frac = 0;
1867         exp = 0;
1868         break;
1869 
1870     case float_class_snan:
1871     case float_class_qnan:
1872         /* NaNs have the integer bit set. */
1873         frac = p->frac_hi | (1ull << 63);
1874         exp = fmt->exp_max;
1875         break;
1876 
1877     default:
1878         g_assert_not_reached();
1879     }
1880 
1881     return packFloatx80(p->sign, exp, frac);
1882 }
1883 
1884 /*
1885  * Addition and subtraction
1886  */
1887 
1888 static float16 QEMU_FLATTEN
1889 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1890 {
1891     FloatParts64 pa, pb, *pr;
1892 
1893     float16_unpack_canonical(&pa, a, status);
1894     float16_unpack_canonical(&pb, b, status);
1895     pr = parts_addsub(&pa, &pb, status, subtract);
1896 
1897     return float16_round_pack_canonical(pr, status);
1898 }
1899 
1900 float16 float16_add(float16 a, float16 b, float_status *status)
1901 {
1902     return float16_addsub(a, b, status, false);
1903 }
1904 
1905 float16 float16_sub(float16 a, float16 b, float_status *status)
1906 {
1907     return float16_addsub(a, b, status, true);
1908 }
1909 
1910 static float32 QEMU_SOFTFLOAT_ATTR
1911 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1912 {
1913     FloatParts64 pa, pb, *pr;
1914 
1915     float32_unpack_canonical(&pa, a, status);
1916     float32_unpack_canonical(&pb, b, status);
1917     pr = parts_addsub(&pa, &pb, status, subtract);
1918 
1919     return float32_round_pack_canonical(pr, status);
1920 }
1921 
1922 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1923 {
1924     return soft_f32_addsub(a, b, status, false);
1925 }
1926 
1927 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1928 {
1929     return soft_f32_addsub(a, b, status, true);
1930 }
1931 
1932 static float64 QEMU_SOFTFLOAT_ATTR
1933 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1934 {
1935     FloatParts64 pa, pb, *pr;
1936 
1937     float64_unpack_canonical(&pa, a, status);
1938     float64_unpack_canonical(&pb, b, status);
1939     pr = parts_addsub(&pa, &pb, status, subtract);
1940 
1941     return float64_round_pack_canonical(pr, status);
1942 }
1943 
1944 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1945 {
1946     return soft_f64_addsub(a, b, status, false);
1947 }
1948 
1949 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1950 {
1951     return soft_f64_addsub(a, b, status, true);
1952 }
1953 
1954 static float hard_f32_add(float a, float b)
1955 {
1956     return a + b;
1957 }
1958 
1959 static float hard_f32_sub(float a, float b)
1960 {
1961     return a - b;
1962 }
1963 
1964 static double hard_f64_add(double a, double b)
1965 {
1966     return a + b;
1967 }
1968 
1969 static double hard_f64_sub(double a, double b)
1970 {
1971     return a - b;
1972 }
1973 
1974 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1975 {
1976     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1977         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1978     }
1979     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1980 }
1981 
1982 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1983 {
1984     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1985         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1986     } else {
1987         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1988     }
1989 }
1990 
1991 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1992                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1993 {
1994     return float32_gen2(a, b, s, hard, soft,
1995                         f32_is_zon2, f32_addsubmul_post);
1996 }
1997 
1998 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1999                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
2000 {
2001     return float64_gen2(a, b, s, hard, soft,
2002                         f64_is_zon2, f64_addsubmul_post);
2003 }
2004 
2005 float32 QEMU_FLATTEN
2006 float32_add(float32 a, float32 b, float_status *s)
2007 {
2008     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
2009 }
2010 
2011 float32 QEMU_FLATTEN
2012 float32_sub(float32 a, float32 b, float_status *s)
2013 {
2014     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
2015 }
2016 
2017 float64 QEMU_FLATTEN
2018 float64_add(float64 a, float64 b, float_status *s)
2019 {
2020     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2021 }
2022 
2023 float64 QEMU_FLATTEN
2024 float64_sub(float64 a, float64 b, float_status *s)
2025 {
2026     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2027 }
2028 
2029 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2030                                  bool subtract)
2031 {
2032     FloatParts64 pa, pb, *pr;
2033 
2034     float64_unpack_canonical(&pa, a, status);
2035     float64_unpack_canonical(&pb, b, status);
2036     pr = parts_addsub(&pa, &pb, status, subtract);
2037 
2038     return float64r32_round_pack_canonical(pr, status);
2039 }
2040 
2041 float64 float64r32_add(float64 a, float64 b, float_status *status)
2042 {
2043     return float64r32_addsub(a, b, status, false);
2044 }
2045 
2046 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2047 {
2048     return float64r32_addsub(a, b, status, true);
2049 }
2050 
2051 static bfloat16 QEMU_FLATTEN
2052 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2053 {
2054     FloatParts64 pa, pb, *pr;
2055 
2056     bfloat16_unpack_canonical(&pa, a, status);
2057     bfloat16_unpack_canonical(&pb, b, status);
2058     pr = parts_addsub(&pa, &pb, status, subtract);
2059 
2060     return bfloat16_round_pack_canonical(pr, status);
2061 }
2062 
2063 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2064 {
2065     return bfloat16_addsub(a, b, status, false);
2066 }
2067 
2068 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2069 {
2070     return bfloat16_addsub(a, b, status, true);
2071 }
2072 
2073 static float128 QEMU_FLATTEN
2074 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2075 {
2076     FloatParts128 pa, pb, *pr;
2077 
2078     float128_unpack_canonical(&pa, a, status);
2079     float128_unpack_canonical(&pb, b, status);
2080     pr = parts_addsub(&pa, &pb, status, subtract);
2081 
2082     return float128_round_pack_canonical(pr, status);
2083 }
2084 
2085 float128 float128_add(float128 a, float128 b, float_status *status)
2086 {
2087     return float128_addsub(a, b, status, false);
2088 }
2089 
2090 float128 float128_sub(float128 a, float128 b, float_status *status)
2091 {
2092     return float128_addsub(a, b, status, true);
2093 }
2094 
2095 static floatx80 QEMU_FLATTEN
2096 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2097 {
2098     FloatParts128 pa, pb, *pr;
2099 
2100     if (!floatx80_unpack_canonical(&pa, a, status) ||
2101         !floatx80_unpack_canonical(&pb, b, status)) {
2102         return floatx80_default_nan(status);
2103     }
2104 
2105     pr = parts_addsub(&pa, &pb, status, subtract);
2106     return floatx80_round_pack_canonical(pr, status);
2107 }
2108 
2109 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2110 {
2111     return floatx80_addsub(a, b, status, false);
2112 }
2113 
2114 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2115 {
2116     return floatx80_addsub(a, b, status, true);
2117 }
2118 
2119 /*
2120  * Multiplication
2121  */
2122 
2123 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2124 {
2125     FloatParts64 pa, pb, *pr;
2126 
2127     float16_unpack_canonical(&pa, a, status);
2128     float16_unpack_canonical(&pb, b, status);
2129     pr = parts_mul(&pa, &pb, status);
2130 
2131     return float16_round_pack_canonical(pr, status);
2132 }
2133 
2134 static float32 QEMU_SOFTFLOAT_ATTR
2135 soft_f32_mul(float32 a, float32 b, float_status *status)
2136 {
2137     FloatParts64 pa, pb, *pr;
2138 
2139     float32_unpack_canonical(&pa, a, status);
2140     float32_unpack_canonical(&pb, b, status);
2141     pr = parts_mul(&pa, &pb, status);
2142 
2143     return float32_round_pack_canonical(pr, status);
2144 }
2145 
2146 static float64 QEMU_SOFTFLOAT_ATTR
2147 soft_f64_mul(float64 a, float64 b, float_status *status)
2148 {
2149     FloatParts64 pa, pb, *pr;
2150 
2151     float64_unpack_canonical(&pa, a, status);
2152     float64_unpack_canonical(&pb, b, status);
2153     pr = parts_mul(&pa, &pb, status);
2154 
2155     return float64_round_pack_canonical(pr, status);
2156 }
2157 
2158 static float hard_f32_mul(float a, float b)
2159 {
2160     return a * b;
2161 }
2162 
2163 static double hard_f64_mul(double a, double b)
2164 {
2165     return a * b;
2166 }
2167 
2168 float32 QEMU_FLATTEN
2169 float32_mul(float32 a, float32 b, float_status *s)
2170 {
2171     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2172                         f32_is_zon2, f32_addsubmul_post);
2173 }
2174 
2175 float64 QEMU_FLATTEN
2176 float64_mul(float64 a, float64 b, float_status *s)
2177 {
2178     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2179                         f64_is_zon2, f64_addsubmul_post);
2180 }
2181 
2182 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2183 {
2184     FloatParts64 pa, pb, *pr;
2185 
2186     float64_unpack_canonical(&pa, a, status);
2187     float64_unpack_canonical(&pb, b, status);
2188     pr = parts_mul(&pa, &pb, status);
2189 
2190     return float64r32_round_pack_canonical(pr, status);
2191 }
2192 
2193 bfloat16 QEMU_FLATTEN
2194 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2195 {
2196     FloatParts64 pa, pb, *pr;
2197 
2198     bfloat16_unpack_canonical(&pa, a, status);
2199     bfloat16_unpack_canonical(&pb, b, status);
2200     pr = parts_mul(&pa, &pb, status);
2201 
2202     return bfloat16_round_pack_canonical(pr, status);
2203 }
2204 
2205 float128 QEMU_FLATTEN
2206 float128_mul(float128 a, float128 b, float_status *status)
2207 {
2208     FloatParts128 pa, pb, *pr;
2209 
2210     float128_unpack_canonical(&pa, a, status);
2211     float128_unpack_canonical(&pb, b, status);
2212     pr = parts_mul(&pa, &pb, status);
2213 
2214     return float128_round_pack_canonical(pr, status);
2215 }
2216 
2217 floatx80 QEMU_FLATTEN
2218 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2219 {
2220     FloatParts128 pa, pb, *pr;
2221 
2222     if (!floatx80_unpack_canonical(&pa, a, status) ||
2223         !floatx80_unpack_canonical(&pb, b, status)) {
2224         return floatx80_default_nan(status);
2225     }
2226 
2227     pr = parts_mul(&pa, &pb, status);
2228     return floatx80_round_pack_canonical(pr, status);
2229 }
2230 
2231 /*
2232  * Fused multiply-add
2233  */
2234 
2235 float16 QEMU_FLATTEN
2236 float16_muladd_scalbn(float16 a, float16 b, float16 c,
2237                       int scale, int flags, float_status *status)
2238 {
2239     FloatParts64 pa, pb, pc, *pr;
2240 
2241     float16_unpack_canonical(&pa, a, status);
2242     float16_unpack_canonical(&pb, b, status);
2243     float16_unpack_canonical(&pc, c, status);
2244     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2245 
2246     return float16_round_pack_canonical(pr, status);
2247 }
2248 
2249 float16 float16_muladd(float16 a, float16 b, float16 c,
2250                        int flags, float_status *status)
2251 {
2252     return float16_muladd_scalbn(a, b, c, 0, flags, status);
2253 }
2254 
2255 float32 QEMU_SOFTFLOAT_ATTR
2256 float32_muladd_scalbn(float32 a, float32 b, float32 c,
2257                       int scale, int flags, float_status *status)
2258 {
2259     FloatParts64 pa, pb, pc, *pr;
2260 
2261     float32_unpack_canonical(&pa, a, status);
2262     float32_unpack_canonical(&pb, b, status);
2263     float32_unpack_canonical(&pc, c, status);
2264     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2265 
2266     return float32_round_pack_canonical(pr, status);
2267 }
2268 
2269 float64 QEMU_SOFTFLOAT_ATTR
2270 float64_muladd_scalbn(float64 a, float64 b, float64 c,
2271                       int scale, int flags, float_status *status)
2272 {
2273     FloatParts64 pa, pb, pc, *pr;
2274 
2275     float64_unpack_canonical(&pa, a, status);
2276     float64_unpack_canonical(&pb, b, status);
2277     float64_unpack_canonical(&pc, c, status);
2278     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2279 
2280     return float64_round_pack_canonical(pr, status);
2281 }
2282 
2283 static bool force_soft_fma;
2284 
2285 float32 QEMU_FLATTEN
2286 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2287 {
2288     union_float32 ua, ub, uc, ur;
2289 
2290     ua.s = xa;
2291     ub.s = xb;
2292     uc.s = xc;
2293 
2294     if (unlikely(!can_use_fpu(s))) {
2295         goto soft;
2296     }
2297     if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
2298         goto soft;
2299     }
2300 
2301     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2302     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2303         goto soft;
2304     }
2305 
2306     if (unlikely(force_soft_fma)) {
2307         goto soft;
2308     }
2309 
2310     /*
2311      * When (a || b) == 0, there's no need to check for under/over flow,
2312      * since we know the addend is (normal || 0) and the product is 0.
2313      */
2314     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2315         union_float32 up;
2316         bool prod_sign;
2317 
2318         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2319         prod_sign ^= !!(flags & float_muladd_negate_product);
2320         up.s = float32_set_sign(float32_zero, prod_sign);
2321 
2322         if (flags & float_muladd_negate_c) {
2323             uc.h = -uc.h;
2324         }
2325         ur.h = up.h + uc.h;
2326     } else {
2327         union_float32 ua_orig = ua;
2328         union_float32 uc_orig = uc;
2329 
2330         if (flags & float_muladd_negate_product) {
2331             ua.h = -ua.h;
2332         }
2333         if (flags & float_muladd_negate_c) {
2334             uc.h = -uc.h;
2335         }
2336 
2337         ur.h = fmaf(ua.h, ub.h, uc.h);
2338 
2339         if (unlikely(f32_is_inf(ur))) {
2340             float_raise(float_flag_overflow, s);
2341         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2342             ua = ua_orig;
2343             uc = uc_orig;
2344             goto soft;
2345         }
2346     }
2347     if (flags & float_muladd_negate_result) {
2348         return float32_chs(ur.s);
2349     }
2350     return ur.s;
2351 
2352  soft:
2353     return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2354 }
2355 
2356 float64 QEMU_FLATTEN
2357 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2358 {
2359     union_float64 ua, ub, uc, ur;
2360 
2361     ua.s = xa;
2362     ub.s = xb;
2363     uc.s = xc;
2364 
2365     if (unlikely(!can_use_fpu(s))) {
2366         goto soft;
2367     }
2368 
2369     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2370     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2371         goto soft;
2372     }
2373 
2374     if (unlikely(force_soft_fma)) {
2375         goto soft;
2376     }
2377 
2378     /*
2379      * When (a || b) == 0, there's no need to check for under/over flow,
2380      * since we know the addend is (normal || 0) and the product is 0.
2381      */
2382     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2383         union_float64 up;
2384         bool prod_sign;
2385 
2386         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2387         prod_sign ^= !!(flags & float_muladd_negate_product);
2388         up.s = float64_set_sign(float64_zero, prod_sign);
2389 
2390         if (flags & float_muladd_negate_c) {
2391             uc.h = -uc.h;
2392         }
2393         ur.h = up.h + uc.h;
2394     } else {
2395         union_float64 ua_orig = ua;
2396         union_float64 uc_orig = uc;
2397 
2398         if (flags & float_muladd_negate_product) {
2399             ua.h = -ua.h;
2400         }
2401         if (flags & float_muladd_negate_c) {
2402             uc.h = -uc.h;
2403         }
2404 
2405         ur.h = fma(ua.h, ub.h, uc.h);
2406 
2407         if (unlikely(f64_is_inf(ur))) {
2408             float_raise(float_flag_overflow, s);
2409         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2410             ua = ua_orig;
2411             uc = uc_orig;
2412             goto soft;
2413         }
2414     }
2415     if (flags & float_muladd_negate_result) {
2416         return float64_chs(ur.s);
2417     }
2418     return ur.s;
2419 
2420  soft:
2421     return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2422 }
2423 
2424 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2425                           int flags, float_status *status)
2426 {
2427     FloatParts64 pa, pb, pc, *pr;
2428 
2429     float64_unpack_canonical(&pa, a, status);
2430     float64_unpack_canonical(&pb, b, status);
2431     float64_unpack_canonical(&pc, c, status);
2432     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2433 
2434     return float64r32_round_pack_canonical(pr, status);
2435 }
2436 
2437 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2438                                       int flags, float_status *status)
2439 {
2440     FloatParts64 pa, pb, pc, *pr;
2441 
2442     bfloat16_unpack_canonical(&pa, a, status);
2443     bfloat16_unpack_canonical(&pb, b, status);
2444     bfloat16_unpack_canonical(&pc, c, status);
2445     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2446 
2447     return bfloat16_round_pack_canonical(pr, status);
2448 }
2449 
2450 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2451                                       int flags, float_status *status)
2452 {
2453     FloatParts128 pa, pb, pc, *pr;
2454 
2455     float128_unpack_canonical(&pa, a, status);
2456     float128_unpack_canonical(&pb, b, status);
2457     float128_unpack_canonical(&pc, c, status);
2458     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2459 
2460     return float128_round_pack_canonical(pr, status);
2461 }
2462 
2463 /*
2464  * Division
2465  */
2466 
2467 float16 float16_div(float16 a, float16 b, float_status *status)
2468 {
2469     FloatParts64 pa, pb, *pr;
2470 
2471     float16_unpack_canonical(&pa, a, status);
2472     float16_unpack_canonical(&pb, b, status);
2473     pr = parts_div(&pa, &pb, status);
2474 
2475     return float16_round_pack_canonical(pr, status);
2476 }
2477 
2478 static float32 QEMU_SOFTFLOAT_ATTR
2479 soft_f32_div(float32 a, float32 b, float_status *status)
2480 {
2481     FloatParts64 pa, pb, *pr;
2482 
2483     float32_unpack_canonical(&pa, a, status);
2484     float32_unpack_canonical(&pb, b, status);
2485     pr = parts_div(&pa, &pb, status);
2486 
2487     return float32_round_pack_canonical(pr, status);
2488 }
2489 
2490 static float64 QEMU_SOFTFLOAT_ATTR
2491 soft_f64_div(float64 a, float64 b, float_status *status)
2492 {
2493     FloatParts64 pa, pb, *pr;
2494 
2495     float64_unpack_canonical(&pa, a, status);
2496     float64_unpack_canonical(&pb, b, status);
2497     pr = parts_div(&pa, &pb, status);
2498 
2499     return float64_round_pack_canonical(pr, status);
2500 }
2501 
2502 static float hard_f32_div(float a, float b)
2503 {
2504     return a / b;
2505 }
2506 
2507 static double hard_f64_div(double a, double b)
2508 {
2509     return a / b;
2510 }
2511 
2512 static bool f32_div_pre(union_float32 a, union_float32 b)
2513 {
2514     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2515         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2516                fpclassify(b.h) == FP_NORMAL;
2517     }
2518     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2519 }
2520 
2521 static bool f64_div_pre(union_float64 a, union_float64 b)
2522 {
2523     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2524         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2525                fpclassify(b.h) == FP_NORMAL;
2526     }
2527     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2528 }
2529 
2530 static bool f32_div_post(union_float32 a, union_float32 b)
2531 {
2532     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2533         return fpclassify(a.h) != FP_ZERO;
2534     }
2535     return !float32_is_zero(a.s);
2536 }
2537 
2538 static bool f64_div_post(union_float64 a, union_float64 b)
2539 {
2540     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2541         return fpclassify(a.h) != FP_ZERO;
2542     }
2543     return !float64_is_zero(a.s);
2544 }
2545 
2546 float32 QEMU_FLATTEN
2547 float32_div(float32 a, float32 b, float_status *s)
2548 {
2549     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2550                         f32_div_pre, f32_div_post);
2551 }
2552 
2553 float64 QEMU_FLATTEN
2554 float64_div(float64 a, float64 b, float_status *s)
2555 {
2556     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2557                         f64_div_pre, f64_div_post);
2558 }
2559 
2560 float64 float64r32_div(float64 a, float64 b, float_status *status)
2561 {
2562     FloatParts64 pa, pb, *pr;
2563 
2564     float64_unpack_canonical(&pa, a, status);
2565     float64_unpack_canonical(&pb, b, status);
2566     pr = parts_div(&pa, &pb, status);
2567 
2568     return float64r32_round_pack_canonical(pr, status);
2569 }
2570 
2571 bfloat16 QEMU_FLATTEN
2572 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2573 {
2574     FloatParts64 pa, pb, *pr;
2575 
2576     bfloat16_unpack_canonical(&pa, a, status);
2577     bfloat16_unpack_canonical(&pb, b, status);
2578     pr = parts_div(&pa, &pb, status);
2579 
2580     return bfloat16_round_pack_canonical(pr, status);
2581 }
2582 
2583 float128 QEMU_FLATTEN
2584 float128_div(float128 a, float128 b, float_status *status)
2585 {
2586     FloatParts128 pa, pb, *pr;
2587 
2588     float128_unpack_canonical(&pa, a, status);
2589     float128_unpack_canonical(&pb, b, status);
2590     pr = parts_div(&pa, &pb, status);
2591 
2592     return float128_round_pack_canonical(pr, status);
2593 }
2594 
2595 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2596 {
2597     FloatParts128 pa, pb, *pr;
2598 
2599     if (!floatx80_unpack_canonical(&pa, a, status) ||
2600         !floatx80_unpack_canonical(&pb, b, status)) {
2601         return floatx80_default_nan(status);
2602     }
2603 
2604     pr = parts_div(&pa, &pb, status);
2605     return floatx80_round_pack_canonical(pr, status);
2606 }
2607 
2608 /*
2609  * Remainder
2610  */
2611 
2612 float32 float32_rem(float32 a, float32 b, float_status *status)
2613 {
2614     FloatParts64 pa, pb, *pr;
2615 
2616     float32_unpack_canonical(&pa, a, status);
2617     float32_unpack_canonical(&pb, b, status);
2618     pr = parts_modrem(&pa, &pb, NULL, status);
2619 
2620     return float32_round_pack_canonical(pr, status);
2621 }
2622 
2623 float64 float64_rem(float64 a, float64 b, float_status *status)
2624 {
2625     FloatParts64 pa, pb, *pr;
2626 
2627     float64_unpack_canonical(&pa, a, status);
2628     float64_unpack_canonical(&pb, b, status);
2629     pr = parts_modrem(&pa, &pb, NULL, status);
2630 
2631     return float64_round_pack_canonical(pr, status);
2632 }
2633 
2634 float128 float128_rem(float128 a, float128 b, float_status *status)
2635 {
2636     FloatParts128 pa, pb, *pr;
2637 
2638     float128_unpack_canonical(&pa, a, status);
2639     float128_unpack_canonical(&pb, b, status);
2640     pr = parts_modrem(&pa, &pb, NULL, status);
2641 
2642     return float128_round_pack_canonical(pr, status);
2643 }
2644 
2645 /*
2646  * Returns the remainder of the extended double-precision floating-point value
2647  * `a' with respect to the corresponding value `b'.
2648  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2649  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2650  * the remainder based on truncating the quotient toward zero instead and
2651  * *quotient is set to the low 64 bits of the absolute value of the integer
2652  * quotient.
2653  */
2654 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2655                          uint64_t *quotient, float_status *status)
2656 {
2657     FloatParts128 pa, pb, *pr;
2658 
2659     *quotient = 0;
2660     if (!floatx80_unpack_canonical(&pa, a, status) ||
2661         !floatx80_unpack_canonical(&pb, b, status)) {
2662         return floatx80_default_nan(status);
2663     }
2664     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2665 
2666     return floatx80_round_pack_canonical(pr, status);
2667 }
2668 
2669 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2670 {
2671     uint64_t quotient;
2672     return floatx80_modrem(a, b, false, &quotient, status);
2673 }
2674 
2675 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2676 {
2677     uint64_t quotient;
2678     return floatx80_modrem(a, b, true, &quotient, status);
2679 }
2680 
2681 /*
2682  * Float to Float conversions
2683  *
2684  * Returns the result of converting one float format to another. The
2685  * conversion is performed according to the IEC/IEEE Standard for
2686  * Binary Floating-Point Arithmetic.
2687  *
2688  * Usually this only needs to take care of raising invalid exceptions
2689  * and handling the conversion on NaNs.
2690  */
2691 
2692 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2693 {
2694     switch (a->cls) {
2695     case float_class_snan:
2696         float_raise(float_flag_invalid_snan, s);
2697         /* fall through */
2698     case float_class_qnan:
2699         /*
2700          * There is no NaN in the destination format.  Raise Invalid
2701          * and return a zero with the sign of the input NaN.
2702          */
2703         float_raise(float_flag_invalid, s);
2704         a->cls = float_class_zero;
2705         break;
2706 
2707     case float_class_inf:
2708         /*
2709          * There is no Inf in the destination format.  Raise Invalid
2710          * and return the maximum normal with the correct sign.
2711          */
2712         float_raise(float_flag_invalid, s);
2713         a->cls = float_class_normal;
2714         a->exp = float16_params_ahp.exp_max;
2715         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2716                                   float16_params_ahp.frac_size + 1);
2717         break;
2718 
2719     case float_class_denormal:
2720         float_raise(float_flag_input_denormal_used, s);
2721         break;
2722     case float_class_normal:
2723     case float_class_zero:
2724         break;
2725 
2726     default:
2727         g_assert_not_reached();
2728     }
2729 }
2730 
2731 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2732 {
2733     if (is_nan(a->cls)) {
2734         parts_return_nan(a, s);
2735     }
2736     if (a->cls == float_class_denormal) {
2737         float_raise(float_flag_input_denormal_used, s);
2738     }
2739 }
2740 
2741 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2742 {
2743     if (is_nan(a->cls)) {
2744         parts_return_nan(a, s);
2745     }
2746     if (a->cls == float_class_denormal) {
2747         float_raise(float_flag_input_denormal_used, s);
2748     }
2749 }
2750 
2751 #define parts_float_to_float(P, S) \
2752     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2753 
2754 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2755                                         float_status *s)
2756 {
2757     a->cls = b->cls;
2758     a->sign = b->sign;
2759     a->exp = b->exp;
2760 
2761     switch (a->cls) {
2762     case float_class_denormal:
2763         float_raise(float_flag_input_denormal_used, s);
2764         /* fall through */
2765     case float_class_normal:
2766         frac_truncjam(a, b);
2767         break;
2768     case float_class_snan:
2769     case float_class_qnan:
2770         /* Discard the low bits of the NaN. */
2771         a->frac = b->frac_hi;
2772         parts_return_nan(a, s);
2773         break;
2774     default:
2775         break;
2776     }
2777 }
2778 
2779 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2780                                        float_status *s)
2781 {
2782     a->cls = b->cls;
2783     a->sign = b->sign;
2784     a->exp = b->exp;
2785     frac_widen(a, b);
2786 
2787     if (is_nan(a->cls)) {
2788         parts_return_nan(a, s);
2789     }
2790     if (a->cls == float_class_denormal) {
2791         float_raise(float_flag_input_denormal_used, s);
2792     }
2793 }
2794 
2795 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2796 {
2797     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2798     FloatParts64 p;
2799 
2800     float16a_unpack_canonical(&p, a, s, fmt16);
2801     parts_float_to_float(&p, s);
2802     return float32_round_pack_canonical(&p, s);
2803 }
2804 
2805 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2806 {
2807     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2808     FloatParts64 p;
2809 
2810     float16a_unpack_canonical(&p, a, s, fmt16);
2811     parts_float_to_float(&p, s);
2812     return float64_round_pack_canonical(&p, s);
2813 }
2814 
2815 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2816 {
2817     FloatParts64 p;
2818     const FloatFmt *fmt;
2819 
2820     float32_unpack_canonical(&p, a, s);
2821     if (ieee) {
2822         parts_float_to_float(&p, s);
2823         fmt = &float16_params;
2824     } else {
2825         parts_float_to_ahp(&p, s);
2826         fmt = &float16_params_ahp;
2827     }
2828     return float16a_round_pack_canonical(&p, s, fmt);
2829 }
2830 
2831 static float64 QEMU_SOFTFLOAT_ATTR
2832 soft_float32_to_float64(float32 a, float_status *s)
2833 {
2834     FloatParts64 p;
2835 
2836     float32_unpack_canonical(&p, a, s);
2837     parts_float_to_float(&p, s);
2838     return float64_round_pack_canonical(&p, s);
2839 }
2840 
2841 float64 float32_to_float64(float32 a, float_status *s)
2842 {
2843     if (likely(float32_is_normal(a))) {
2844         /* Widening conversion can never produce inexact results.  */
2845         union_float32 uf;
2846         union_float64 ud;
2847         uf.s = a;
2848         ud.h = uf.h;
2849         return ud.s;
2850     } else if (float32_is_zero(a)) {
2851         return float64_set_sign(float64_zero, float32_is_neg(a));
2852     } else {
2853         return soft_float32_to_float64(a, s);
2854     }
2855 }
2856 
2857 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2858 {
2859     FloatParts64 p;
2860     const FloatFmt *fmt;
2861 
2862     float64_unpack_canonical(&p, a, s);
2863     if (ieee) {
2864         parts_float_to_float(&p, s);
2865         fmt = &float16_params;
2866     } else {
2867         parts_float_to_ahp(&p, s);
2868         fmt = &float16_params_ahp;
2869     }
2870     return float16a_round_pack_canonical(&p, s, fmt);
2871 }
2872 
2873 float32 float64_to_float32(float64 a, float_status *s)
2874 {
2875     FloatParts64 p;
2876 
2877     float64_unpack_canonical(&p, a, s);
2878     parts_float_to_float(&p, s);
2879     return float32_round_pack_canonical(&p, s);
2880 }
2881 
2882 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2883 {
2884     FloatParts64 p;
2885 
2886     bfloat16_unpack_canonical(&p, a, s);
2887     parts_float_to_float(&p, s);
2888     return float32_round_pack_canonical(&p, s);
2889 }
2890 
2891 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2892 {
2893     FloatParts64 p;
2894 
2895     bfloat16_unpack_canonical(&p, a, s);
2896     parts_float_to_float(&p, s);
2897     return float64_round_pack_canonical(&p, s);
2898 }
2899 
2900 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2901 {
2902     FloatParts64 p;
2903 
2904     float32_unpack_canonical(&p, a, s);
2905     parts_float_to_float(&p, s);
2906     return bfloat16_round_pack_canonical(&p, s);
2907 }
2908 
2909 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2910 {
2911     FloatParts64 p;
2912 
2913     float64_unpack_canonical(&p, a, s);
2914     parts_float_to_float(&p, s);
2915     return bfloat16_round_pack_canonical(&p, s);
2916 }
2917 
2918 float32 float128_to_float32(float128 a, float_status *s)
2919 {
2920     FloatParts64 p64;
2921     FloatParts128 p128;
2922 
2923     float128_unpack_canonical(&p128, a, s);
2924     parts_float_to_float_narrow(&p64, &p128, s);
2925     return float32_round_pack_canonical(&p64, s);
2926 }
2927 
2928 float64 float128_to_float64(float128 a, float_status *s)
2929 {
2930     FloatParts64 p64;
2931     FloatParts128 p128;
2932 
2933     float128_unpack_canonical(&p128, a, s);
2934     parts_float_to_float_narrow(&p64, &p128, s);
2935     return float64_round_pack_canonical(&p64, s);
2936 }
2937 
2938 float128 float32_to_float128(float32 a, float_status *s)
2939 {
2940     FloatParts64 p64;
2941     FloatParts128 p128;
2942 
2943     float32_unpack_canonical(&p64, a, s);
2944     parts_float_to_float_widen(&p128, &p64, s);
2945     return float128_round_pack_canonical(&p128, s);
2946 }
2947 
2948 float128 float64_to_float128(float64 a, float_status *s)
2949 {
2950     FloatParts64 p64;
2951     FloatParts128 p128;
2952 
2953     float64_unpack_canonical(&p64, a, s);
2954     parts_float_to_float_widen(&p128, &p64, s);
2955     return float128_round_pack_canonical(&p128, s);
2956 }
2957 
2958 float32 floatx80_to_float32(floatx80 a, float_status *s)
2959 {
2960     FloatParts64 p64;
2961     FloatParts128 p128;
2962 
2963     if (floatx80_unpack_canonical(&p128, a, s)) {
2964         parts_float_to_float_narrow(&p64, &p128, s);
2965     } else {
2966         parts_default_nan(&p64, s);
2967     }
2968     return float32_round_pack_canonical(&p64, s);
2969 }
2970 
2971 float64 floatx80_to_float64(floatx80 a, float_status *s)
2972 {
2973     FloatParts64 p64;
2974     FloatParts128 p128;
2975 
2976     if (floatx80_unpack_canonical(&p128, a, s)) {
2977         parts_float_to_float_narrow(&p64, &p128, s);
2978     } else {
2979         parts_default_nan(&p64, s);
2980     }
2981     return float64_round_pack_canonical(&p64, s);
2982 }
2983 
2984 float128 floatx80_to_float128(floatx80 a, float_status *s)
2985 {
2986     FloatParts128 p;
2987 
2988     if (floatx80_unpack_canonical(&p, a, s)) {
2989         parts_float_to_float(&p, s);
2990     } else {
2991         parts_default_nan(&p, s);
2992     }
2993     return float128_round_pack_canonical(&p, s);
2994 }
2995 
2996 floatx80 float32_to_floatx80(float32 a, float_status *s)
2997 {
2998     FloatParts64 p64;
2999     FloatParts128 p128;
3000 
3001     float32_unpack_canonical(&p64, a, s);
3002     parts_float_to_float_widen(&p128, &p64, s);
3003     return floatx80_round_pack_canonical(&p128, s);
3004 }
3005 
3006 floatx80 float64_to_floatx80(float64 a, float_status *s)
3007 {
3008     FloatParts64 p64;
3009     FloatParts128 p128;
3010 
3011     float64_unpack_canonical(&p64, a, s);
3012     parts_float_to_float_widen(&p128, &p64, s);
3013     return floatx80_round_pack_canonical(&p128, s);
3014 }
3015 
3016 floatx80 float128_to_floatx80(float128 a, float_status *s)
3017 {
3018     FloatParts128 p;
3019 
3020     float128_unpack_canonical(&p, a, s);
3021     parts_float_to_float(&p, s);
3022     return floatx80_round_pack_canonical(&p, s);
3023 }
3024 
3025 /*
3026  * Round to integral value
3027  */
3028 
3029 float16 float16_round_to_int(float16 a, float_status *s)
3030 {
3031     FloatParts64 p;
3032 
3033     float16_unpack_canonical(&p, a, s);
3034     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
3035     return float16_round_pack_canonical(&p, s);
3036 }
3037 
3038 float32 float32_round_to_int(float32 a, float_status *s)
3039 {
3040     FloatParts64 p;
3041 
3042     float32_unpack_canonical(&p, a, s);
3043     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
3044     return float32_round_pack_canonical(&p, s);
3045 }
3046 
3047 float64 float64_round_to_int(float64 a, float_status *s)
3048 {
3049     FloatParts64 p;
3050 
3051     float64_unpack_canonical(&p, a, s);
3052     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3053     return float64_round_pack_canonical(&p, s);
3054 }
3055 
3056 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3057 {
3058     FloatParts64 p;
3059 
3060     bfloat16_unpack_canonical(&p, a, s);
3061     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3062     return bfloat16_round_pack_canonical(&p, s);
3063 }
3064 
3065 float128 float128_round_to_int(float128 a, float_status *s)
3066 {
3067     FloatParts128 p;
3068 
3069     float128_unpack_canonical(&p, a, s);
3070     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3071     return float128_round_pack_canonical(&p, s);
3072 }
3073 
3074 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3075 {
3076     FloatParts128 p;
3077 
3078     if (!floatx80_unpack_canonical(&p, a, status)) {
3079         return floatx80_default_nan(status);
3080     }
3081 
3082     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3083                        &floatx80_params[status->floatx80_rounding_precision]);
3084     return floatx80_round_pack_canonical(&p, status);
3085 }
3086 
3087 /*
3088  * Floating-point to signed integer conversions
3089  */
3090 
3091 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3092                               float_status *s)
3093 {
3094     FloatParts64 p;
3095 
3096     float16_unpack_canonical(&p, a, s);
3097     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3098 }
3099 
3100 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3101                                 float_status *s)
3102 {
3103     FloatParts64 p;
3104 
3105     float16_unpack_canonical(&p, a, s);
3106     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3107 }
3108 
3109 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3110                                 float_status *s)
3111 {
3112     FloatParts64 p;
3113 
3114     float16_unpack_canonical(&p, a, s);
3115     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3116 }
3117 
3118 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3119                                 float_status *s)
3120 {
3121     FloatParts64 p;
3122 
3123     float16_unpack_canonical(&p, a, s);
3124     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3125 }
3126 
3127 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3128                                 float_status *s)
3129 {
3130     FloatParts64 p;
3131 
3132     float32_unpack_canonical(&p, a, s);
3133     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3134 }
3135 
3136 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3137                                 float_status *s)
3138 {
3139     FloatParts64 p;
3140 
3141     float32_unpack_canonical(&p, a, s);
3142     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3143 }
3144 
3145 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3146                                 float_status *s)
3147 {
3148     FloatParts64 p;
3149 
3150     float32_unpack_canonical(&p, a, s);
3151     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3152 }
3153 
3154 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3155                                 float_status *s)
3156 {
3157     FloatParts64 p;
3158 
3159     float64_unpack_canonical(&p, a, s);
3160     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3161 }
3162 
3163 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3164                                 float_status *s)
3165 {
3166     FloatParts64 p;
3167 
3168     float64_unpack_canonical(&p, a, s);
3169     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3170 }
3171 
3172 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3173                                 float_status *s)
3174 {
3175     FloatParts64 p;
3176 
3177     float64_unpack_canonical(&p, a, s);
3178     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3179 }
3180 
3181 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3182                                float_status *s)
3183 {
3184     FloatParts64 p;
3185 
3186     bfloat16_unpack_canonical(&p, a, s);
3187     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3188 }
3189 
3190 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3191                                  float_status *s)
3192 {
3193     FloatParts64 p;
3194 
3195     bfloat16_unpack_canonical(&p, a, s);
3196     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3197 }
3198 
3199 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3200                                  float_status *s)
3201 {
3202     FloatParts64 p;
3203 
3204     bfloat16_unpack_canonical(&p, a, s);
3205     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3206 }
3207 
3208 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3209                                  float_status *s)
3210 {
3211     FloatParts64 p;
3212 
3213     bfloat16_unpack_canonical(&p, a, s);
3214     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3215 }
3216 
3217 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3218                                         int scale, float_status *s)
3219 {
3220     FloatParts128 p;
3221 
3222     float128_unpack_canonical(&p, a, s);
3223     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3224 }
3225 
3226 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3227                                         int scale, float_status *s)
3228 {
3229     FloatParts128 p;
3230 
3231     float128_unpack_canonical(&p, a, s);
3232     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3233 }
3234 
3235 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3236                                         int scale, float_status *s)
3237 {
3238     int flags = 0;
3239     Int128 r;
3240     FloatParts128 p;
3241 
3242     float128_unpack_canonical(&p, a, s);
3243 
3244     switch (p.cls) {
3245     case float_class_snan:
3246         flags |= float_flag_invalid_snan;
3247         /* fall through */
3248     case float_class_qnan:
3249         flags |= float_flag_invalid;
3250         r = UINT128_MAX;
3251         break;
3252 
3253     case float_class_inf:
3254         flags = float_flag_invalid | float_flag_invalid_cvti;
3255         r = p.sign ? INT128_MIN : INT128_MAX;
3256         break;
3257 
3258     case float_class_zero:
3259         return int128_zero();
3260 
3261     case float_class_normal:
3262     case float_class_denormal:
3263         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3264             flags = float_flag_inexact;
3265         }
3266 
3267         if (p.exp < 127) {
3268             int shift = 127 - p.exp;
3269             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3270             if (p.sign) {
3271                 r = int128_neg(r);
3272             }
3273         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3274                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3275             r = INT128_MIN;
3276         } else {
3277             flags = float_flag_invalid | float_flag_invalid_cvti;
3278             r = p.sign ? INT128_MIN : INT128_MAX;
3279         }
3280         break;
3281 
3282     default:
3283         g_assert_not_reached();
3284     }
3285 
3286     float_raise(flags, s);
3287     return r;
3288 }
3289 
3290 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3291                                         int scale, float_status *s)
3292 {
3293     FloatParts128 p;
3294 
3295     if (!floatx80_unpack_canonical(&p, a, s)) {
3296         parts_default_nan(&p, s);
3297     }
3298     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3299 }
3300 
3301 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3302                                         int scale, float_status *s)
3303 {
3304     FloatParts128 p;
3305 
3306     if (!floatx80_unpack_canonical(&p, a, s)) {
3307         parts_default_nan(&p, s);
3308     }
3309     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3310 }
3311 
3312 int8_t float16_to_int8(float16 a, float_status *s)
3313 {
3314     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3315 }
3316 
3317 int16_t float16_to_int16(float16 a, float_status *s)
3318 {
3319     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3320 }
3321 
3322 int32_t float16_to_int32(float16 a, float_status *s)
3323 {
3324     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3325 }
3326 
3327 int64_t float16_to_int64(float16 a, float_status *s)
3328 {
3329     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3330 }
3331 
3332 int16_t float32_to_int16(float32 a, float_status *s)
3333 {
3334     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3335 }
3336 
3337 int32_t float32_to_int32(float32 a, float_status *s)
3338 {
3339     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3340 }
3341 
3342 int64_t float32_to_int64(float32 a, float_status *s)
3343 {
3344     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3345 }
3346 
3347 int16_t float64_to_int16(float64 a, float_status *s)
3348 {
3349     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3350 }
3351 
3352 int32_t float64_to_int32(float64 a, float_status *s)
3353 {
3354     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3355 }
3356 
3357 int64_t float64_to_int64(float64 a, float_status *s)
3358 {
3359     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3360 }
3361 
3362 int32_t float128_to_int32(float128 a, float_status *s)
3363 {
3364     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3365 }
3366 
3367 int64_t float128_to_int64(float128 a, float_status *s)
3368 {
3369     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3370 }
3371 
3372 Int128 float128_to_int128(float128 a, float_status *s)
3373 {
3374     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3375 }
3376 
3377 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3378 {
3379     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3380 }
3381 
3382 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3383 {
3384     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3385 }
3386 
3387 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3388 {
3389     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3390 }
3391 
3392 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3393 {
3394     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3395 }
3396 
3397 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3398 {
3399     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3400 }
3401 
3402 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3403 {
3404     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3405 }
3406 
3407 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3408 {
3409     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3410 }
3411 
3412 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3413 {
3414     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3415 }
3416 
3417 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3418 {
3419     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3420 }
3421 
3422 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3423 {
3424     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3425 }
3426 
3427 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3428 {
3429     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3430 }
3431 
3432 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3433 {
3434     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3435 }
3436 
3437 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3438 {
3439     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3440 }
3441 
3442 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3443 {
3444     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3445 }
3446 
3447 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3448 {
3449     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3450 }
3451 
3452 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3453 {
3454     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3455 }
3456 
3457 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3458 {
3459     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3460 }
3461 
3462 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3463 {
3464     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3465 }
3466 
3467 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3468 {
3469     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3470 }
3471 
3472 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3473 {
3474     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3475 }
3476 
3477 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3478 {
3479     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3480 }
3481 
3482 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3483 {
3484     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3485 }
3486 
3487 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3488 {
3489     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3490 }
3491 
3492 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3493 {
3494     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3495 }
3496 
3497 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3498                                 float_status *s)
3499 {
3500     FloatParts64 p;
3501 
3502     float64_unpack_canonical(&p, a, s);
3503     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3504 }
3505 
3506 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3507                                 float_status *s)
3508 {
3509     FloatParts64 p;
3510 
3511     float64_unpack_canonical(&p, a, s);
3512     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3513 }
3514 
3515 /*
3516  * Floating-point to unsigned integer conversions
3517  */
3518 
3519 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3520                                 float_status *s)
3521 {
3522     FloatParts64 p;
3523 
3524     float16_unpack_canonical(&p, a, s);
3525     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3526 }
3527 
3528 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3529                                   float_status *s)
3530 {
3531     FloatParts64 p;
3532 
3533     float16_unpack_canonical(&p, a, s);
3534     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3535 }
3536 
3537 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3538                                   float_status *s)
3539 {
3540     FloatParts64 p;
3541 
3542     float16_unpack_canonical(&p, a, s);
3543     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3544 }
3545 
3546 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3547                                   float_status *s)
3548 {
3549     FloatParts64 p;
3550 
3551     float16_unpack_canonical(&p, a, s);
3552     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3553 }
3554 
3555 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3556                                   float_status *s)
3557 {
3558     FloatParts64 p;
3559 
3560     float32_unpack_canonical(&p, a, s);
3561     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3562 }
3563 
3564 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3565                                   float_status *s)
3566 {
3567     FloatParts64 p;
3568 
3569     float32_unpack_canonical(&p, a, s);
3570     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3571 }
3572 
3573 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3574                                   float_status *s)
3575 {
3576     FloatParts64 p;
3577 
3578     float32_unpack_canonical(&p, a, s);
3579     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3580 }
3581 
3582 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3583                                   float_status *s)
3584 {
3585     FloatParts64 p;
3586 
3587     float64_unpack_canonical(&p, a, s);
3588     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3589 }
3590 
3591 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3592                                   float_status *s)
3593 {
3594     FloatParts64 p;
3595 
3596     float64_unpack_canonical(&p, a, s);
3597     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3598 }
3599 
3600 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3601                                   float_status *s)
3602 {
3603     FloatParts64 p;
3604 
3605     float64_unpack_canonical(&p, a, s);
3606     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3607 }
3608 
3609 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3610                                  int scale, float_status *s)
3611 {
3612     FloatParts64 p;
3613 
3614     bfloat16_unpack_canonical(&p, a, s);
3615     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3616 }
3617 
3618 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3619                                    int scale, float_status *s)
3620 {
3621     FloatParts64 p;
3622 
3623     bfloat16_unpack_canonical(&p, a, s);
3624     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3625 }
3626 
3627 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3628                                    int scale, float_status *s)
3629 {
3630     FloatParts64 p;
3631 
3632     bfloat16_unpack_canonical(&p, a, s);
3633     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3634 }
3635 
3636 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3637                                    int scale, float_status *s)
3638 {
3639     FloatParts64 p;
3640 
3641     bfloat16_unpack_canonical(&p, a, s);
3642     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3643 }
3644 
3645 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3646                                           int scale, float_status *s)
3647 {
3648     FloatParts128 p;
3649 
3650     float128_unpack_canonical(&p, a, s);
3651     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3652 }
3653 
3654 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3655                                           int scale, float_status *s)
3656 {
3657     FloatParts128 p;
3658 
3659     float128_unpack_canonical(&p, a, s);
3660     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3661 }
3662 
3663 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3664                                          int scale, float_status *s)
3665 {
3666     int flags = 0;
3667     Int128 r;
3668     FloatParts128 p;
3669 
3670     float128_unpack_canonical(&p, a, s);
3671 
3672     switch (p.cls) {
3673     case float_class_snan:
3674         flags |= float_flag_invalid_snan;
3675         /* fall through */
3676     case float_class_qnan:
3677         flags |= float_flag_invalid;
3678         r = UINT128_MAX;
3679         break;
3680 
3681     case float_class_inf:
3682         flags = float_flag_invalid | float_flag_invalid_cvti;
3683         r = p.sign ? int128_zero() : UINT128_MAX;
3684         break;
3685 
3686     case float_class_zero:
3687         return int128_zero();
3688 
3689     case float_class_normal:
3690     case float_class_denormal:
3691         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3692             flags = float_flag_inexact;
3693             if (p.cls == float_class_zero) {
3694                 r = int128_zero();
3695                 break;
3696             }
3697         }
3698 
3699         if (p.sign) {
3700             flags = float_flag_invalid | float_flag_invalid_cvti;
3701             r = int128_zero();
3702         } else if (p.exp <= 127) {
3703             int shift = 127 - p.exp;
3704             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3705         } else {
3706             flags = float_flag_invalid | float_flag_invalid_cvti;
3707             r = UINT128_MAX;
3708         }
3709         break;
3710 
3711     default:
3712         g_assert_not_reached();
3713     }
3714 
3715     float_raise(flags, s);
3716     return r;
3717 }
3718 
3719 uint8_t float16_to_uint8(float16 a, float_status *s)
3720 {
3721     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3722 }
3723 
3724 uint16_t float16_to_uint16(float16 a, float_status *s)
3725 {
3726     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3727 }
3728 
3729 uint32_t float16_to_uint32(float16 a, float_status *s)
3730 {
3731     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3732 }
3733 
3734 uint64_t float16_to_uint64(float16 a, float_status *s)
3735 {
3736     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3737 }
3738 
3739 uint16_t float32_to_uint16(float32 a, float_status *s)
3740 {
3741     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3742 }
3743 
3744 uint32_t float32_to_uint32(float32 a, float_status *s)
3745 {
3746     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3747 }
3748 
3749 uint64_t float32_to_uint64(float32 a, float_status *s)
3750 {
3751     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3752 }
3753 
3754 uint16_t float64_to_uint16(float64 a, float_status *s)
3755 {
3756     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3757 }
3758 
3759 uint32_t float64_to_uint32(float64 a, float_status *s)
3760 {
3761     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3762 }
3763 
3764 uint64_t float64_to_uint64(float64 a, float_status *s)
3765 {
3766     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3767 }
3768 
3769 uint32_t float128_to_uint32(float128 a, float_status *s)
3770 {
3771     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3772 }
3773 
3774 uint64_t float128_to_uint64(float128 a, float_status *s)
3775 {
3776     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3777 }
3778 
3779 Int128 float128_to_uint128(float128 a, float_status *s)
3780 {
3781     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3782 }
3783 
3784 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3785 {
3786     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3787 }
3788 
3789 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3790 {
3791     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3792 }
3793 
3794 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3795 {
3796     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3797 }
3798 
3799 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3800 {
3801     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3802 }
3803 
3804 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3805 {
3806     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3807 }
3808 
3809 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3810 {
3811     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3812 }
3813 
3814 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3815 {
3816     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3817 }
3818 
3819 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3820 {
3821     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3822 }
3823 
3824 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3825 {
3826     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3827 }
3828 
3829 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3830 {
3831     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3832 }
3833 
3834 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3835 {
3836     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3837 }
3838 
3839 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3840 {
3841     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3842 }
3843 
3844 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3845 {
3846     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3847 }
3848 
3849 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3850 {
3851     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3852 }
3853 
3854 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3855 {
3856     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3857 }
3858 
3859 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3860 {
3861     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3862 }
3863 
3864 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3865 {
3866     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3867 }
3868 
3869 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3870 {
3871     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3872 }
3873 
3874 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3875 {
3876     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3877 }
3878 
3879 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3880 {
3881     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3882 }
3883 
3884 /*
3885  * Signed integer to floating-point conversions
3886  */
3887 
3888 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3889 {
3890     FloatParts64 p;
3891 
3892     parts_sint_to_float(&p, a, scale, status);
3893     return float16_round_pack_canonical(&p, status);
3894 }
3895 
3896 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3897 {
3898     return int64_to_float16_scalbn(a, scale, status);
3899 }
3900 
3901 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3902 {
3903     return int64_to_float16_scalbn(a, scale, status);
3904 }
3905 
3906 float16 int64_to_float16(int64_t a, float_status *status)
3907 {
3908     return int64_to_float16_scalbn(a, 0, status);
3909 }
3910 
3911 float16 int32_to_float16(int32_t a, float_status *status)
3912 {
3913     return int64_to_float16_scalbn(a, 0, status);
3914 }
3915 
3916 float16 int16_to_float16(int16_t a, float_status *status)
3917 {
3918     return int64_to_float16_scalbn(a, 0, status);
3919 }
3920 
3921 float16 int8_to_float16(int8_t a, float_status *status)
3922 {
3923     return int64_to_float16_scalbn(a, 0, status);
3924 }
3925 
3926 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3927 {
3928     FloatParts64 p;
3929 
3930     /* Without scaling, there are no overflow concerns. */
3931     if (likely(scale == 0) && can_use_fpu(status)) {
3932         union_float32 ur;
3933         ur.h = a;
3934         return ur.s;
3935     }
3936 
3937     parts64_sint_to_float(&p, a, scale, status);
3938     return float32_round_pack_canonical(&p, status);
3939 }
3940 
3941 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3942 {
3943     return int64_to_float32_scalbn(a, scale, status);
3944 }
3945 
3946 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3947 {
3948     return int64_to_float32_scalbn(a, scale, status);
3949 }
3950 
3951 float32 int64_to_float32(int64_t a, float_status *status)
3952 {
3953     return int64_to_float32_scalbn(a, 0, status);
3954 }
3955 
3956 float32 int32_to_float32(int32_t a, float_status *status)
3957 {
3958     return int64_to_float32_scalbn(a, 0, status);
3959 }
3960 
3961 float32 int16_to_float32(int16_t a, float_status *status)
3962 {
3963     return int64_to_float32_scalbn(a, 0, status);
3964 }
3965 
3966 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3967 {
3968     FloatParts64 p;
3969 
3970     /* Without scaling, there are no overflow concerns. */
3971     if (likely(scale == 0) && can_use_fpu(status)) {
3972         union_float64 ur;
3973         ur.h = a;
3974         return ur.s;
3975     }
3976 
3977     parts_sint_to_float(&p, a, scale, status);
3978     return float64_round_pack_canonical(&p, status);
3979 }
3980 
3981 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3982 {
3983     return int64_to_float64_scalbn(a, scale, status);
3984 }
3985 
3986 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3987 {
3988     return int64_to_float64_scalbn(a, scale, status);
3989 }
3990 
3991 float64 int64_to_float64(int64_t a, float_status *status)
3992 {
3993     return int64_to_float64_scalbn(a, 0, status);
3994 }
3995 
3996 float64 int32_to_float64(int32_t a, float_status *status)
3997 {
3998     return int64_to_float64_scalbn(a, 0, status);
3999 }
4000 
4001 float64 int16_to_float64(int16_t a, float_status *status)
4002 {
4003     return int64_to_float64_scalbn(a, 0, status);
4004 }
4005 
4006 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
4007 {
4008     FloatParts64 p;
4009 
4010     parts_sint_to_float(&p, a, scale, status);
4011     return bfloat16_round_pack_canonical(&p, status);
4012 }
4013 
4014 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
4015 {
4016     return int64_to_bfloat16_scalbn(a, scale, status);
4017 }
4018 
4019 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
4020 {
4021     return int64_to_bfloat16_scalbn(a, scale, status);
4022 }
4023 
4024 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
4025 {
4026     return int64_to_bfloat16_scalbn(a, scale, status);
4027 }
4028 
4029 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
4030 {
4031     return int64_to_bfloat16_scalbn(a, 0, status);
4032 }
4033 
4034 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
4035 {
4036     return int64_to_bfloat16_scalbn(a, 0, status);
4037 }
4038 
4039 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
4040 {
4041     return int64_to_bfloat16_scalbn(a, 0, status);
4042 }
4043 
4044 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
4045 {
4046     return int64_to_bfloat16_scalbn(a, 0, status);
4047 }
4048 
4049 float128 int128_to_float128(Int128 a, float_status *status)
4050 {
4051     FloatParts128 p = { };
4052     int shift;
4053 
4054     if (int128_nz(a)) {
4055         p.cls = float_class_normal;
4056         if (!int128_nonneg(a)) {
4057             p.sign = true;
4058             a = int128_neg(a);
4059         }
4060 
4061         shift = clz64(int128_gethi(a));
4062         if (shift == 64) {
4063             shift += clz64(int128_getlo(a));
4064         }
4065 
4066         p.exp = 127 - shift;
4067         a = int128_lshift(a, shift);
4068 
4069         p.frac_hi = int128_gethi(a);
4070         p.frac_lo = int128_getlo(a);
4071     } else {
4072         p.cls = float_class_zero;
4073     }
4074 
4075     return float128_round_pack_canonical(&p, status);
4076 }
4077 
4078 float128 int64_to_float128(int64_t a, float_status *status)
4079 {
4080     FloatParts128 p;
4081 
4082     parts_sint_to_float(&p, a, 0, status);
4083     return float128_round_pack_canonical(&p, status);
4084 }
4085 
4086 float128 int32_to_float128(int32_t a, float_status *status)
4087 {
4088     return int64_to_float128(a, status);
4089 }
4090 
4091 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4092 {
4093     FloatParts128 p;
4094 
4095     parts_sint_to_float(&p, a, 0, status);
4096     return floatx80_round_pack_canonical(&p, status);
4097 }
4098 
4099 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4100 {
4101     return int64_to_floatx80(a, status);
4102 }
4103 
4104 /*
4105  * Unsigned Integer to floating-point conversions
4106  */
4107 
4108 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4109 {
4110     FloatParts64 p;
4111 
4112     parts_uint_to_float(&p, a, scale, status);
4113     return float16_round_pack_canonical(&p, status);
4114 }
4115 
4116 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4117 {
4118     return uint64_to_float16_scalbn(a, scale, status);
4119 }
4120 
4121 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4122 {
4123     return uint64_to_float16_scalbn(a, scale, status);
4124 }
4125 
4126 float16 uint64_to_float16(uint64_t a, float_status *status)
4127 {
4128     return uint64_to_float16_scalbn(a, 0, status);
4129 }
4130 
4131 float16 uint32_to_float16(uint32_t a, float_status *status)
4132 {
4133     return uint64_to_float16_scalbn(a, 0, status);
4134 }
4135 
4136 float16 uint16_to_float16(uint16_t a, float_status *status)
4137 {
4138     return uint64_to_float16_scalbn(a, 0, status);
4139 }
4140 
4141 float16 uint8_to_float16(uint8_t a, float_status *status)
4142 {
4143     return uint64_to_float16_scalbn(a, 0, status);
4144 }
4145 
4146 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4147 {
4148     FloatParts64 p;
4149 
4150     /* Without scaling, there are no overflow concerns. */
4151     if (likely(scale == 0) && can_use_fpu(status)) {
4152         union_float32 ur;
4153         ur.h = a;
4154         return ur.s;
4155     }
4156 
4157     parts_uint_to_float(&p, a, scale, status);
4158     return float32_round_pack_canonical(&p, status);
4159 }
4160 
4161 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4162 {
4163     return uint64_to_float32_scalbn(a, scale, status);
4164 }
4165 
4166 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4167 {
4168     return uint64_to_float32_scalbn(a, scale, status);
4169 }
4170 
4171 float32 uint64_to_float32(uint64_t a, float_status *status)
4172 {
4173     return uint64_to_float32_scalbn(a, 0, status);
4174 }
4175 
4176 float32 uint32_to_float32(uint32_t a, float_status *status)
4177 {
4178     return uint64_to_float32_scalbn(a, 0, status);
4179 }
4180 
4181 float32 uint16_to_float32(uint16_t a, float_status *status)
4182 {
4183     return uint64_to_float32_scalbn(a, 0, status);
4184 }
4185 
4186 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4187 {
4188     FloatParts64 p;
4189 
4190     /* Without scaling, there are no overflow concerns. */
4191     if (likely(scale == 0) && can_use_fpu(status)) {
4192         union_float64 ur;
4193         ur.h = a;
4194         return ur.s;
4195     }
4196 
4197     parts_uint_to_float(&p, a, scale, status);
4198     return float64_round_pack_canonical(&p, status);
4199 }
4200 
4201 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4202 {
4203     return uint64_to_float64_scalbn(a, scale, status);
4204 }
4205 
4206 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4207 {
4208     return uint64_to_float64_scalbn(a, scale, status);
4209 }
4210 
4211 float64 uint64_to_float64(uint64_t a, float_status *status)
4212 {
4213     return uint64_to_float64_scalbn(a, 0, status);
4214 }
4215 
4216 float64 uint32_to_float64(uint32_t a, float_status *status)
4217 {
4218     return uint64_to_float64_scalbn(a, 0, status);
4219 }
4220 
4221 float64 uint16_to_float64(uint16_t a, float_status *status)
4222 {
4223     return uint64_to_float64_scalbn(a, 0, status);
4224 }
4225 
4226 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4227 {
4228     FloatParts64 p;
4229 
4230     parts_uint_to_float(&p, a, scale, status);
4231     return bfloat16_round_pack_canonical(&p, status);
4232 }
4233 
4234 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4235 {
4236     return uint64_to_bfloat16_scalbn(a, scale, status);
4237 }
4238 
4239 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4240 {
4241     return uint64_to_bfloat16_scalbn(a, scale, status);
4242 }
4243 
4244 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4245 {
4246     return uint64_to_bfloat16_scalbn(a, scale, status);
4247 }
4248 
4249 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4250 {
4251     return uint64_to_bfloat16_scalbn(a, 0, status);
4252 }
4253 
4254 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4255 {
4256     return uint64_to_bfloat16_scalbn(a, 0, status);
4257 }
4258 
4259 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4260 {
4261     return uint64_to_bfloat16_scalbn(a, 0, status);
4262 }
4263 
4264 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4265 {
4266     return uint64_to_bfloat16_scalbn(a, 0, status);
4267 }
4268 
4269 float128 uint64_to_float128(uint64_t a, float_status *status)
4270 {
4271     FloatParts128 p;
4272 
4273     parts_uint_to_float(&p, a, 0, status);
4274     return float128_round_pack_canonical(&p, status);
4275 }
4276 
4277 float128 uint128_to_float128(Int128 a, float_status *status)
4278 {
4279     FloatParts128 p = { };
4280     int shift;
4281 
4282     if (int128_nz(a)) {
4283         p.cls = float_class_normal;
4284 
4285         shift = clz64(int128_gethi(a));
4286         if (shift == 64) {
4287             shift += clz64(int128_getlo(a));
4288         }
4289 
4290         p.exp = 127 - shift;
4291         a = int128_lshift(a, shift);
4292 
4293         p.frac_hi = int128_gethi(a);
4294         p.frac_lo = int128_getlo(a);
4295     } else {
4296         p.cls = float_class_zero;
4297     }
4298 
4299     return float128_round_pack_canonical(&p, status);
4300 }
4301 
4302 /*
4303  * Minimum and maximum
4304  */
4305 
4306 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4307 {
4308     FloatParts64 pa, pb, *pr;
4309 
4310     float16_unpack_canonical(&pa, a, s);
4311     float16_unpack_canonical(&pb, b, s);
4312     pr = parts_minmax(&pa, &pb, s, flags);
4313 
4314     return float16_round_pack_canonical(pr, s);
4315 }
4316 
4317 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4318                                 float_status *s, int flags)
4319 {
4320     FloatParts64 pa, pb, *pr;
4321 
4322     bfloat16_unpack_canonical(&pa, a, s);
4323     bfloat16_unpack_canonical(&pb, b, s);
4324     pr = parts_minmax(&pa, &pb, s, flags);
4325 
4326     return bfloat16_round_pack_canonical(pr, s);
4327 }
4328 
4329 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4330 {
4331     FloatParts64 pa, pb, *pr;
4332 
4333     float32_unpack_canonical(&pa, a, s);
4334     float32_unpack_canonical(&pb, b, s);
4335     pr = parts_minmax(&pa, &pb, s, flags);
4336 
4337     return float32_round_pack_canonical(pr, s);
4338 }
4339 
4340 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4341 {
4342     FloatParts64 pa, pb, *pr;
4343 
4344     float64_unpack_canonical(&pa, a, s);
4345     float64_unpack_canonical(&pb, b, s);
4346     pr = parts_minmax(&pa, &pb, s, flags);
4347 
4348     return float64_round_pack_canonical(pr, s);
4349 }
4350 
4351 static float128 float128_minmax(float128 a, float128 b,
4352                                 float_status *s, int flags)
4353 {
4354     FloatParts128 pa, pb, *pr;
4355 
4356     float128_unpack_canonical(&pa, a, s);
4357     float128_unpack_canonical(&pb, b, s);
4358     pr = parts_minmax(&pa, &pb, s, flags);
4359 
4360     return float128_round_pack_canonical(pr, s);
4361 }
4362 
4363 #define MINMAX_1(type, name, flags) \
4364     type type##_##name(type a, type b, float_status *s) \
4365     { return type##_minmax(a, b, s, flags); }
4366 
4367 #define MINMAX_2(type) \
4368     MINMAX_1(type, max, 0)                                                \
4369     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4370     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4371     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4372     MINMAX_1(type, min, minmax_ismin)                                     \
4373     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4374     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4375     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4376 
4377 MINMAX_2(float16)
4378 MINMAX_2(bfloat16)
4379 MINMAX_2(float32)
4380 MINMAX_2(float64)
4381 MINMAX_2(float128)
4382 
4383 #undef MINMAX_1
4384 #undef MINMAX_2
4385 
4386 /*
4387  * Floating point compare
4388  */
4389 
4390 static FloatRelation QEMU_FLATTEN
4391 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4392 {
4393     FloatParts64 pa, pb;
4394 
4395     float16_unpack_canonical(&pa, a, s);
4396     float16_unpack_canonical(&pb, b, s);
4397     return parts_compare(&pa, &pb, s, is_quiet);
4398 }
4399 
4400 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4401 {
4402     return float16_do_compare(a, b, s, false);
4403 }
4404 
4405 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4406 {
4407     return float16_do_compare(a, b, s, true);
4408 }
4409 
4410 static FloatRelation QEMU_SOFTFLOAT_ATTR
4411 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4412 {
4413     FloatParts64 pa, pb;
4414 
4415     float32_unpack_canonical(&pa, a, s);
4416     float32_unpack_canonical(&pb, b, s);
4417     return parts_compare(&pa, &pb, s, is_quiet);
4418 }
4419 
4420 static FloatRelation QEMU_FLATTEN
4421 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4422 {
4423     union_float32 ua, ub;
4424 
4425     ua.s = xa;
4426     ub.s = xb;
4427 
4428     if (QEMU_NO_HARDFLOAT) {
4429         goto soft;
4430     }
4431 
4432     if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) {
4433         /* We may need to set the input_denormal_used flag */
4434         goto soft;
4435     }
4436 
4437     if (isgreaterequal(ua.h, ub.h)) {
4438         if (isgreater(ua.h, ub.h)) {
4439             return float_relation_greater;
4440         }
4441         return float_relation_equal;
4442     }
4443     if (likely(isless(ua.h, ub.h))) {
4444         return float_relation_less;
4445     }
4446     /*
4447      * The only condition remaining is unordered.
4448      * Fall through to set flags.
4449      */
4450  soft:
4451     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4452 }
4453 
4454 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4455 {
4456     return float32_hs_compare(a, b, s, false);
4457 }
4458 
4459 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4460 {
4461     return float32_hs_compare(a, b, s, true);
4462 }
4463 
4464 static FloatRelation QEMU_SOFTFLOAT_ATTR
4465 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4466 {
4467     FloatParts64 pa, pb;
4468 
4469     float64_unpack_canonical(&pa, a, s);
4470     float64_unpack_canonical(&pb, b, s);
4471     return parts_compare(&pa, &pb, s, is_quiet);
4472 }
4473 
4474 static FloatRelation QEMU_FLATTEN
4475 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4476 {
4477     union_float64 ua, ub;
4478 
4479     ua.s = xa;
4480     ub.s = xb;
4481 
4482     if (QEMU_NO_HARDFLOAT) {
4483         goto soft;
4484     }
4485 
4486     if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) {
4487         /* We may need to set the input_denormal_used flag */
4488         goto soft;
4489     }
4490 
4491     if (isgreaterequal(ua.h, ub.h)) {
4492         if (isgreater(ua.h, ub.h)) {
4493             return float_relation_greater;
4494         }
4495         return float_relation_equal;
4496     }
4497     if (likely(isless(ua.h, ub.h))) {
4498         return float_relation_less;
4499     }
4500     /*
4501      * The only condition remaining is unordered.
4502      * Fall through to set flags.
4503      */
4504  soft:
4505     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4506 }
4507 
4508 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4509 {
4510     return float64_hs_compare(a, b, s, false);
4511 }
4512 
4513 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4514 {
4515     return float64_hs_compare(a, b, s, true);
4516 }
4517 
4518 static FloatRelation QEMU_FLATTEN
4519 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4520 {
4521     FloatParts64 pa, pb;
4522 
4523     bfloat16_unpack_canonical(&pa, a, s);
4524     bfloat16_unpack_canonical(&pb, b, s);
4525     return parts_compare(&pa, &pb, s, is_quiet);
4526 }
4527 
4528 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4529 {
4530     return bfloat16_do_compare(a, b, s, false);
4531 }
4532 
4533 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4534 {
4535     return bfloat16_do_compare(a, b, s, true);
4536 }
4537 
4538 static FloatRelation QEMU_FLATTEN
4539 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4540 {
4541     FloatParts128 pa, pb;
4542 
4543     float128_unpack_canonical(&pa, a, s);
4544     float128_unpack_canonical(&pb, b, s);
4545     return parts_compare(&pa, &pb, s, is_quiet);
4546 }
4547 
4548 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4549 {
4550     return float128_do_compare(a, b, s, false);
4551 }
4552 
4553 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4554 {
4555     return float128_do_compare(a, b, s, true);
4556 }
4557 
4558 static FloatRelation QEMU_FLATTEN
4559 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4560 {
4561     FloatParts128 pa, pb;
4562 
4563     if (!floatx80_unpack_canonical(&pa, a, s) ||
4564         !floatx80_unpack_canonical(&pb, b, s)) {
4565         return float_relation_unordered;
4566     }
4567     return parts_compare(&pa, &pb, s, is_quiet);
4568 }
4569 
4570 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4571 {
4572     return floatx80_do_compare(a, b, s, false);
4573 }
4574 
4575 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4576 {
4577     return floatx80_do_compare(a, b, s, true);
4578 }
4579 
4580 /*
4581  * Scale by 2**N
4582  */
4583 
4584 float16 float16_scalbn(float16 a, int n, float_status *status)
4585 {
4586     FloatParts64 p;
4587 
4588     float16_unpack_canonical(&p, a, status);
4589     parts_scalbn(&p, n, status);
4590     return float16_round_pack_canonical(&p, status);
4591 }
4592 
4593 float32 float32_scalbn(float32 a, int n, float_status *status)
4594 {
4595     FloatParts64 p;
4596 
4597     float32_unpack_canonical(&p, a, status);
4598     parts_scalbn(&p, n, status);
4599     return float32_round_pack_canonical(&p, status);
4600 }
4601 
4602 float64 float64_scalbn(float64 a, int n, float_status *status)
4603 {
4604     FloatParts64 p;
4605 
4606     float64_unpack_canonical(&p, a, status);
4607     parts_scalbn(&p, n, status);
4608     return float64_round_pack_canonical(&p, status);
4609 }
4610 
4611 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4612 {
4613     FloatParts64 p;
4614 
4615     bfloat16_unpack_canonical(&p, a, status);
4616     parts_scalbn(&p, n, status);
4617     return bfloat16_round_pack_canonical(&p, status);
4618 }
4619 
4620 float128 float128_scalbn(float128 a, int n, float_status *status)
4621 {
4622     FloatParts128 p;
4623 
4624     float128_unpack_canonical(&p, a, status);
4625     parts_scalbn(&p, n, status);
4626     return float128_round_pack_canonical(&p, status);
4627 }
4628 
4629 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4630 {
4631     FloatParts128 p;
4632 
4633     if (!floatx80_unpack_canonical(&p, a, status)) {
4634         return floatx80_default_nan(status);
4635     }
4636     parts_scalbn(&p, n, status);
4637     return floatx80_round_pack_canonical(&p, status);
4638 }
4639 
4640 /*
4641  * Square Root
4642  */
4643 
4644 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4645 {
4646     FloatParts64 p;
4647 
4648     float16_unpack_canonical(&p, a, status);
4649     parts_sqrt(&p, status, &float16_params);
4650     return float16_round_pack_canonical(&p, status);
4651 }
4652 
4653 static float32 QEMU_SOFTFLOAT_ATTR
4654 soft_f32_sqrt(float32 a, float_status *status)
4655 {
4656     FloatParts64 p;
4657 
4658     float32_unpack_canonical(&p, a, status);
4659     parts_sqrt(&p, status, &float32_params);
4660     return float32_round_pack_canonical(&p, status);
4661 }
4662 
4663 static float64 QEMU_SOFTFLOAT_ATTR
4664 soft_f64_sqrt(float64 a, float_status *status)
4665 {
4666     FloatParts64 p;
4667 
4668     float64_unpack_canonical(&p, a, status);
4669     parts_sqrt(&p, status, &float64_params);
4670     return float64_round_pack_canonical(&p, status);
4671 }
4672 
4673 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4674 {
4675     union_float32 ua, ur;
4676 
4677     ua.s = xa;
4678     if (unlikely(!can_use_fpu(s))) {
4679         goto soft;
4680     }
4681 
4682     float32_input_flush1(&ua.s, s);
4683     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4684         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4685                        fpclassify(ua.h) == FP_ZERO) ||
4686                      signbit(ua.h))) {
4687             goto soft;
4688         }
4689     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4690                         float32_is_neg(ua.s))) {
4691         goto soft;
4692     }
4693     ur.h = sqrtf(ua.h);
4694     return ur.s;
4695 
4696  soft:
4697     return soft_f32_sqrt(ua.s, s);
4698 }
4699 
4700 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4701 {
4702     union_float64 ua, ur;
4703 
4704     ua.s = xa;
4705     if (unlikely(!can_use_fpu(s))) {
4706         goto soft;
4707     }
4708 
4709     float64_input_flush1(&ua.s, s);
4710     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4711         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4712                        fpclassify(ua.h) == FP_ZERO) ||
4713                      signbit(ua.h))) {
4714             goto soft;
4715         }
4716     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4717                         float64_is_neg(ua.s))) {
4718         goto soft;
4719     }
4720     ur.h = sqrt(ua.h);
4721     return ur.s;
4722 
4723  soft:
4724     return soft_f64_sqrt(ua.s, s);
4725 }
4726 
4727 float64 float64r32_sqrt(float64 a, float_status *status)
4728 {
4729     FloatParts64 p;
4730 
4731     float64_unpack_canonical(&p, a, status);
4732     parts_sqrt(&p, status, &float64_params);
4733     return float64r32_round_pack_canonical(&p, status);
4734 }
4735 
4736 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4737 {
4738     FloatParts64 p;
4739 
4740     bfloat16_unpack_canonical(&p, a, status);
4741     parts_sqrt(&p, status, &bfloat16_params);
4742     return bfloat16_round_pack_canonical(&p, status);
4743 }
4744 
4745 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4746 {
4747     FloatParts128 p;
4748 
4749     float128_unpack_canonical(&p, a, status);
4750     parts_sqrt(&p, status, &float128_params);
4751     return float128_round_pack_canonical(&p, status);
4752 }
4753 
4754 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4755 {
4756     FloatParts128 p;
4757 
4758     if (!floatx80_unpack_canonical(&p, a, s)) {
4759         return floatx80_default_nan(s);
4760     }
4761     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4762     return floatx80_round_pack_canonical(&p, s);
4763 }
4764 
4765 /*
4766  * log2
4767  */
4768 float32 float32_log2(float32 a, float_status *status)
4769 {
4770     FloatParts64 p;
4771 
4772     float32_unpack_canonical(&p, a, status);
4773     parts_log2(&p, status, &float32_params);
4774     return float32_round_pack_canonical(&p, status);
4775 }
4776 
4777 float64 float64_log2(float64 a, float_status *status)
4778 {
4779     FloatParts64 p;
4780 
4781     float64_unpack_canonical(&p, a, status);
4782     parts_log2(&p, status, &float64_params);
4783     return float64_round_pack_canonical(&p, status);
4784 }
4785 
4786 /*----------------------------------------------------------------------------
4787 | The pattern for a default generated NaN.
4788 *----------------------------------------------------------------------------*/
4789 
4790 float16 float16_default_nan(float_status *status)
4791 {
4792     FloatParts64 p;
4793 
4794     parts_default_nan(&p, status);
4795     p.frac >>= float16_params.frac_shift;
4796     return float16_pack_raw(&p);
4797 }
4798 
4799 float32 float32_default_nan(float_status *status)
4800 {
4801     FloatParts64 p;
4802 
4803     parts_default_nan(&p, status);
4804     p.frac >>= float32_params.frac_shift;
4805     return float32_pack_raw(&p);
4806 }
4807 
4808 float64 float64_default_nan(float_status *status)
4809 {
4810     FloatParts64 p;
4811 
4812     parts_default_nan(&p, status);
4813     p.frac >>= float64_params.frac_shift;
4814     return float64_pack_raw(&p);
4815 }
4816 
4817 float128 float128_default_nan(float_status *status)
4818 {
4819     FloatParts128 p;
4820 
4821     parts_default_nan(&p, status);
4822     frac_shr(&p, float128_params.frac_shift);
4823     return float128_pack_raw(&p);
4824 }
4825 
4826 bfloat16 bfloat16_default_nan(float_status *status)
4827 {
4828     FloatParts64 p;
4829 
4830     parts_default_nan(&p, status);
4831     p.frac >>= bfloat16_params.frac_shift;
4832     return bfloat16_pack_raw(&p);
4833 }
4834 
4835 /*----------------------------------------------------------------------------
4836 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4837 *----------------------------------------------------------------------------*/
4838 
4839 float16 float16_silence_nan(float16 a, float_status *status)
4840 {
4841     FloatParts64 p;
4842 
4843     float16_unpack_raw(&p, a);
4844     p.frac <<= float16_params.frac_shift;
4845     parts_silence_nan(&p, status);
4846     p.frac >>= float16_params.frac_shift;
4847     return float16_pack_raw(&p);
4848 }
4849 
4850 float32 float32_silence_nan(float32 a, float_status *status)
4851 {
4852     FloatParts64 p;
4853 
4854     float32_unpack_raw(&p, a);
4855     p.frac <<= float32_params.frac_shift;
4856     parts_silence_nan(&p, status);
4857     p.frac >>= float32_params.frac_shift;
4858     return float32_pack_raw(&p);
4859 }
4860 
4861 float64 float64_silence_nan(float64 a, float_status *status)
4862 {
4863     FloatParts64 p;
4864 
4865     float64_unpack_raw(&p, a);
4866     p.frac <<= float64_params.frac_shift;
4867     parts_silence_nan(&p, status);
4868     p.frac >>= float64_params.frac_shift;
4869     return float64_pack_raw(&p);
4870 }
4871 
4872 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4873 {
4874     FloatParts64 p;
4875 
4876     bfloat16_unpack_raw(&p, a);
4877     p.frac <<= bfloat16_params.frac_shift;
4878     parts_silence_nan(&p, status);
4879     p.frac >>= bfloat16_params.frac_shift;
4880     return bfloat16_pack_raw(&p);
4881 }
4882 
4883 float128 float128_silence_nan(float128 a, float_status *status)
4884 {
4885     FloatParts128 p;
4886 
4887     float128_unpack_raw(&p, a);
4888     frac_shl(&p, float128_params.frac_shift);
4889     parts_silence_nan(&p, status);
4890     frac_shr(&p, float128_params.frac_shift);
4891     return float128_pack_raw(&p);
4892 }
4893 
4894 /*----------------------------------------------------------------------------
4895 | If `a' is denormal and we are in flush-to-zero mode then set the
4896 | input-denormal exception and return zero. Otherwise just return the value.
4897 *----------------------------------------------------------------------------*/
4898 
4899 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4900 {
4901     if (p.exp == 0 && p.frac != 0) {
4902         float_raise(float_flag_input_denormal_flushed, status);
4903         return true;
4904     }
4905 
4906     return false;
4907 }
4908 
4909 float16 float16_squash_input_denormal(float16 a, float_status *status)
4910 {
4911     if (status->flush_inputs_to_zero) {
4912         FloatParts64 p;
4913 
4914         float16_unpack_raw(&p, a);
4915         if (parts_squash_denormal(p, status)) {
4916             return float16_set_sign(float16_zero, p.sign);
4917         }
4918     }
4919     return a;
4920 }
4921 
4922 float32 float32_squash_input_denormal(float32 a, float_status *status)
4923 {
4924     if (status->flush_inputs_to_zero) {
4925         FloatParts64 p;
4926 
4927         float32_unpack_raw(&p, a);
4928         if (parts_squash_denormal(p, status)) {
4929             return float32_set_sign(float32_zero, p.sign);
4930         }
4931     }
4932     return a;
4933 }
4934 
4935 float64 float64_squash_input_denormal(float64 a, float_status *status)
4936 {
4937     if (status->flush_inputs_to_zero) {
4938         FloatParts64 p;
4939 
4940         float64_unpack_raw(&p, a);
4941         if (parts_squash_denormal(p, status)) {
4942             return float64_set_sign(float64_zero, p.sign);
4943         }
4944     }
4945     return a;
4946 }
4947 
4948 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4949 {
4950     if (status->flush_inputs_to_zero) {
4951         FloatParts64 p;
4952 
4953         bfloat16_unpack_raw(&p, a);
4954         if (parts_squash_denormal(p, status)) {
4955             return bfloat16_set_sign(bfloat16_zero, p.sign);
4956         }
4957     }
4958     return a;
4959 }
4960 
4961 /*----------------------------------------------------------------------------
4962 | Normalizes the subnormal extended double-precision floating-point value
4963 | represented by the denormalized significand `aSig'.  The normalized exponent
4964 | and significand are stored at the locations pointed to by `zExpPtr' and
4965 | `zSigPtr', respectively.
4966 *----------------------------------------------------------------------------*/
4967 
4968 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4969                                 uint64_t *zSigPtr)
4970 {
4971     int8_t shiftCount;
4972 
4973     shiftCount = clz64(aSig);
4974     *zSigPtr = aSig<<shiftCount;
4975     *zExpPtr = 1 - shiftCount;
4976 }
4977 
4978 /*----------------------------------------------------------------------------
4979 | Takes two extended double-precision floating-point values `a' and `b', one
4980 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
4981 | `b' is a signaling NaN, the invalid exception is raised.
4982 *----------------------------------------------------------------------------*/
4983 
4984 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
4985 {
4986     FloatParts128 pa, pb, *pr;
4987 
4988     if (!floatx80_unpack_canonical(&pa, a, status) ||
4989         !floatx80_unpack_canonical(&pb, b, status)) {
4990         return floatx80_default_nan(status);
4991     }
4992 
4993     pr = parts_pick_nan(&pa, &pb, status);
4994     return floatx80_round_pack_canonical(pr, status);
4995 }
4996 
4997 /*----------------------------------------------------------------------------
4998 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4999 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
5000 | and returns the proper extended double-precision floating-point value
5001 | corresponding to the abstract input.  Ordinarily, the abstract value is
5002 | rounded and packed into the extended double-precision format, with the
5003 | inexact exception raised if the abstract input cannot be represented
5004 | exactly.  However, if the abstract value is too large, the overflow and
5005 | inexact exceptions are raised and an infinity or maximal finite value is
5006 | returned.  If the abstract value is too small, the input value is rounded to
5007 | a subnormal number, and the underflow and inexact exceptions are raised if
5008 | the abstract input cannot be represented exactly as a subnormal extended
5009 | double-precision floating-point number.
5010 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
5011 | the result is rounded to the same number of bits as single or double
5012 | precision, respectively.  Otherwise, the result is rounded to the full
5013 | precision of the extended double-precision format.
5014 |     The input significand must be normalized or smaller.  If the input
5015 | significand is not normalized, `zExp' must be 0; in that case, the result
5016 | returned is a subnormal number, and it must not require rounding.  The
5017 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
5018 | Floating-Point Arithmetic.
5019 *----------------------------------------------------------------------------*/
5020 
5021 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
5022                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
5023                               float_status *status)
5024 {
5025     FloatRoundMode roundingMode;
5026     bool roundNearestEven, increment, isTiny;
5027     int64_t roundIncrement, roundMask, roundBits;
5028 
5029     roundingMode = status->float_rounding_mode;
5030     roundNearestEven = ( roundingMode == float_round_nearest_even );
5031     switch (roundingPrecision) {
5032     case floatx80_precision_x:
5033         goto precision80;
5034     case floatx80_precision_d:
5035         roundIncrement = UINT64_C(0x0000000000000400);
5036         roundMask = UINT64_C(0x00000000000007FF);
5037         break;
5038     case floatx80_precision_s:
5039         roundIncrement = UINT64_C(0x0000008000000000);
5040         roundMask = UINT64_C(0x000000FFFFFFFFFF);
5041         break;
5042     default:
5043         g_assert_not_reached();
5044     }
5045     zSig0 |= ( zSig1 != 0 );
5046     switch (roundingMode) {
5047     case float_round_nearest_even:
5048     case float_round_ties_away:
5049         break;
5050     case float_round_to_zero:
5051         roundIncrement = 0;
5052         break;
5053     case float_round_up:
5054         roundIncrement = zSign ? 0 : roundMask;
5055         break;
5056     case float_round_down:
5057         roundIncrement = zSign ? roundMask : 0;
5058         break;
5059     default:
5060         abort();
5061     }
5062     roundBits = zSig0 & roundMask;
5063     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5064         if (    ( 0x7FFE < zExp )
5065              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
5066            ) {
5067             goto overflow;
5068         }
5069         if ( zExp <= 0 ) {
5070             if (status->flush_to_zero) {
5071                 float_raise(float_flag_output_denormal_flushed, status);
5072                 return packFloatx80(zSign, 0, 0);
5073             }
5074             isTiny = status->tininess_before_rounding
5075                   || (zExp < 0 )
5076                   || (zSig0 <= zSig0 + roundIncrement);
5077             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5078             zExp = 0;
5079             roundBits = zSig0 & roundMask;
5080             if (isTiny && roundBits) {
5081                 float_raise(float_flag_underflow, status);
5082             }
5083             if (roundBits) {
5084                 float_raise(float_flag_inexact, status);
5085             }
5086             zSig0 += roundIncrement;
5087             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5088             roundIncrement = roundMask + 1;
5089             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5090                 roundMask |= roundIncrement;
5091             }
5092             zSig0 &= ~ roundMask;
5093             return packFloatx80( zSign, zExp, zSig0 );
5094         }
5095     }
5096     if (roundBits) {
5097         float_raise(float_flag_inexact, status);
5098     }
5099     zSig0 += roundIncrement;
5100     if ( zSig0 < roundIncrement ) {
5101         ++zExp;
5102         zSig0 = UINT64_C(0x8000000000000000);
5103     }
5104     roundIncrement = roundMask + 1;
5105     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5106         roundMask |= roundIncrement;
5107     }
5108     zSig0 &= ~ roundMask;
5109     if ( zSig0 == 0 ) zExp = 0;
5110     return packFloatx80( zSign, zExp, zSig0 );
5111  precision80:
5112     switch (roundingMode) {
5113     case float_round_nearest_even:
5114     case float_round_ties_away:
5115         increment = ((int64_t)zSig1 < 0);
5116         break;
5117     case float_round_to_zero:
5118         increment = 0;
5119         break;
5120     case float_round_up:
5121         increment = !zSign && zSig1;
5122         break;
5123     case float_round_down:
5124         increment = zSign && zSig1;
5125         break;
5126     default:
5127         abort();
5128     }
5129     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5130         if (    ( 0x7FFE < zExp )
5131              || (    ( zExp == 0x7FFE )
5132                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5133                   && increment
5134                 )
5135            ) {
5136             roundMask = 0;
5137  overflow:
5138             float_raise(float_flag_overflow | float_flag_inexact, status);
5139             if (    ( roundingMode == float_round_to_zero )
5140                  || ( zSign && ( roundingMode == float_round_up ) )
5141                  || ( ! zSign && ( roundingMode == float_round_down ) )
5142                ) {
5143                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5144             }
5145             return floatx80_default_inf(zSign, status);
5146         }
5147         if ( zExp <= 0 ) {
5148             isTiny = status->tininess_before_rounding
5149                   || (zExp < 0)
5150                   || !increment
5151                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5152             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5153             zExp = 0;
5154             if (isTiny && zSig1) {
5155                 float_raise(float_flag_underflow, status);
5156             }
5157             if (zSig1) {
5158                 float_raise(float_flag_inexact, status);
5159             }
5160             switch (roundingMode) {
5161             case float_round_nearest_even:
5162             case float_round_ties_away:
5163                 increment = ((int64_t)zSig1 < 0);
5164                 break;
5165             case float_round_to_zero:
5166                 increment = 0;
5167                 break;
5168             case float_round_up:
5169                 increment = !zSign && zSig1;
5170                 break;
5171             case float_round_down:
5172                 increment = zSign && zSig1;
5173                 break;
5174             default:
5175                 abort();
5176             }
5177             if ( increment ) {
5178                 ++zSig0;
5179                 if (!(zSig1 << 1) && roundNearestEven) {
5180                     zSig0 &= ~1;
5181                 }
5182                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5183             }
5184             return packFloatx80( zSign, zExp, zSig0 );
5185         }
5186     }
5187     if (zSig1) {
5188         float_raise(float_flag_inexact, status);
5189     }
5190     if ( increment ) {
5191         ++zSig0;
5192         if ( zSig0 == 0 ) {
5193             ++zExp;
5194             zSig0 = UINT64_C(0x8000000000000000);
5195         }
5196         else {
5197             if (!(zSig1 << 1) && roundNearestEven) {
5198                 zSig0 &= ~1;
5199             }
5200         }
5201     }
5202     else {
5203         if ( zSig0 == 0 ) zExp = 0;
5204     }
5205     return packFloatx80( zSign, zExp, zSig0 );
5206 
5207 }
5208 
5209 /*----------------------------------------------------------------------------
5210 | Takes an abstract floating-point value having sign `zSign', exponent
5211 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5212 | and returns the proper extended double-precision floating-point value
5213 | corresponding to the abstract input.  This routine is just like
5214 | `roundAndPackFloatx80' except that the input significand does not have to be
5215 | normalized.
5216 *----------------------------------------------------------------------------*/
5217 
5218 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5219                                        bool zSign, int32_t zExp,
5220                                        uint64_t zSig0, uint64_t zSig1,
5221                                        float_status *status)
5222 {
5223     int8_t shiftCount;
5224 
5225     if ( zSig0 == 0 ) {
5226         zSig0 = zSig1;
5227         zSig1 = 0;
5228         zExp -= 64;
5229     }
5230     shiftCount = clz64(zSig0);
5231     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5232     zExp -= shiftCount;
5233     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5234                                 zSig0, zSig1, status);
5235 
5236 }
5237 
5238 /*----------------------------------------------------------------------------
5239 | Returns the binary exponential of the single-precision floating-point value
5240 | `a'. The operation is performed according to the IEC/IEEE Standard for
5241 | Binary Floating-Point Arithmetic.
5242 |
5243 | Uses the following identities:
5244 |
5245 | 1. -------------------------------------------------------------------------
5246 |      x    x*ln(2)
5247 |     2  = e
5248 |
5249 | 2. -------------------------------------------------------------------------
5250 |                      2     3     4     5           n
5251 |      x        x     x     x     x     x           x
5252 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5253 |               1!    2!    3!    4!    5!          n!
5254 *----------------------------------------------------------------------------*/
5255 
5256 static const float64 float32_exp2_coefficients[15] =
5257 {
5258     const_float64( 0x3ff0000000000000ll ), /*  1 */
5259     const_float64( 0x3fe0000000000000ll ), /*  2 */
5260     const_float64( 0x3fc5555555555555ll ), /*  3 */
5261     const_float64( 0x3fa5555555555555ll ), /*  4 */
5262     const_float64( 0x3f81111111111111ll ), /*  5 */
5263     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5264     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5265     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5266     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5267     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5268     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5269     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5270     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5271     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5272     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5273 };
5274 
5275 float32 float32_exp2(float32 a, float_status *status)
5276 {
5277     FloatParts64 xp, xnp, tp, rp;
5278     int i;
5279 
5280     float32_unpack_canonical(&xp, a, status);
5281     if (unlikely(xp.cls != float_class_normal)) {
5282         switch (xp.cls) {
5283         case float_class_denormal:
5284             break;
5285         case float_class_snan:
5286         case float_class_qnan:
5287             parts_return_nan(&xp, status);
5288             return float32_round_pack_canonical(&xp, status);
5289         case float_class_inf:
5290             return xp.sign ? float32_zero : a;
5291         case float_class_zero:
5292             return float32_one;
5293         default:
5294             g_assert_not_reached();
5295         }
5296     }
5297 
5298     float_raise(float_flag_inexact, status);
5299 
5300     float64_unpack_canonical(&tp, float64_ln2, status);
5301     xp = *parts_mul(&xp, &tp, status);
5302     xnp = xp;
5303 
5304     float64_unpack_canonical(&rp, float64_one, status);
5305     for (i = 0 ; i < 15 ; i++) {
5306 
5307         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5308         rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
5309         xnp = *parts_mul(&xnp, &xp, status);
5310     }
5311 
5312     return float32_round_pack_canonical(&rp, status);
5313 }
5314 
5315 /*----------------------------------------------------------------------------
5316 | Rounds the extended double-precision floating-point value `a'
5317 | to the precision provided by floatx80_rounding_precision and returns the
5318 | result as an extended double-precision floating-point value.
5319 | The operation is performed according to the IEC/IEEE Standard for Binary
5320 | Floating-Point Arithmetic.
5321 *----------------------------------------------------------------------------*/
5322 
5323 floatx80 floatx80_round(floatx80 a, float_status *status)
5324 {
5325     FloatParts128 p;
5326 
5327     if (!floatx80_unpack_canonical(&p, a, status)) {
5328         return floatx80_default_nan(status);
5329     }
5330     return floatx80_round_pack_canonical(&p, status);
5331 }
5332 
5333 static void __attribute__((constructor)) softfloat_init(void)
5334 {
5335     union_float64 ua, ub, uc, ur;
5336 
5337     if (QEMU_NO_HARDFLOAT) {
5338         return;
5339     }
5340     /*
5341      * Test that the host's FMA is not obviously broken. For example,
5342      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5343      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5344      */
5345     ua.s = 0x0020000000000001ULL;
5346     ub.s = 0x3ca0000000000000ULL;
5347     uc.s = 0x0020000000000000ULL;
5348     ur.h = fma(ua.h, ub.h, uc.h);
5349     if (ur.s != 0x0020000000000001ULL) {
5350         force_soft_fma = true;
5351     }
5352 }
5353