xref: /qemu/fpu/softfloat.c (revision 6a243913aa46f3d60ce36c7a826562c6e40b64d7)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  */
408 
409 typedef enum __attribute__ ((__packed__)) {
410     float_class_unclassified,
411     float_class_zero,
412     float_class_normal,
413     float_class_inf,
414     float_class_qnan,  /* all NaNs from here */
415     float_class_snan,
416 } FloatClass;
417 
418 #define float_cmask(bit)  (1u << (bit))
419 
420 enum {
421     float_cmask_zero    = float_cmask(float_class_zero),
422     float_cmask_normal  = float_cmask(float_class_normal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429 };
430 
431 /* Flags for parts_minmax. */
432 enum {
433     /* Set for minimum; clear for maximum. */
434     minmax_ismin = 1,
435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436     minmax_isnum = 2,
437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438     minmax_ismag = 4,
439     /*
440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441      * operations.
442      */
443     minmax_isnumber = 8,
444 };
445 
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
448 {
449     return unlikely(c >= float_class_qnan);
450 }
451 
452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
453 {
454     return c == float_class_snan;
455 }
456 
457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458 {
459     return c == float_class_qnan;
460 }
461 
462 /*
463  * Structure holding all of the decomposed parts of a float.
464  * The exponent is unbiased and the fraction is normalized.
465  *
466  * The fraction words are stored in big-endian word ordering,
467  * so that truncation from a larger format to a smaller format
468  * can be done simply by ignoring subsequent elements.
469  */
470 
471 typedef struct {
472     FloatClass cls;
473     bool sign;
474     int32_t exp;
475     union {
476         /* Routines that know the structure may reference the singular name. */
477         uint64_t frac;
478         /*
479          * Routines expanded with multiple structures reference "hi" and "lo"
480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
481          * both the same word and aliased here.
482          */
483         uint64_t frac_hi;
484         uint64_t frac_lo;
485     };
486 } FloatParts64;
487 
488 typedef struct {
489     FloatClass cls;
490     bool sign;
491     int32_t exp;
492     uint64_t frac_hi;
493     uint64_t frac_lo;
494 } FloatParts128;
495 
496 typedef struct {
497     FloatClass cls;
498     bool sign;
499     int32_t exp;
500     uint64_t frac_hi;
501     uint64_t frac_hm;  /* high-middle */
502     uint64_t frac_lm;  /* low-middle */
503     uint64_t frac_lo;
504 } FloatParts256;
505 
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT    63
508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
509 
510 /* Structure holding all of the relevant parameters for a format.
511  *   exp_size: the size of the exponent field
512  *   exp_bias: the offset applied to the exponent field
513  *   exp_max: the maximum normalised exponent
514  *   frac_size: the size of the fraction field
515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516  * The following are computed based the size of fraction
517  *   round_mask: bits below lsb which must be rounded
518  * The following optional modifiers are available:
519  *   arm_althp: handle ARM Alternative Half Precision
520  *   m68k_denormal: explicit integer bit for extended precision may be 1
521  */
522 typedef struct {
523     int exp_size;
524     int exp_bias;
525     int exp_re_bias;
526     int exp_max;
527     int frac_size;
528     int frac_shift;
529     bool arm_althp;
530     bool m68k_denormal;
531     uint64_t round_mask;
532 } FloatFmt;
533 
534 /* Expand fields based on the size of exponent and fraction */
535 #define FLOAT_PARAMS_(E)                                \
536     .exp_size       = E,                                \
537     .exp_bias       = ((1 << E) - 1) >> 1,              \
538     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
539     .exp_max        = (1 << E) - 1
540 
541 #define FLOAT_PARAMS(E, F)                              \
542     FLOAT_PARAMS_(E),                                   \
543     .frac_size      = F,                                \
544     .frac_shift     = (-F - 1) & 63,                    \
545     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
546 
547 static const FloatFmt float16_params = {
548     FLOAT_PARAMS(5, 10)
549 };
550 
551 static const FloatFmt float16_params_ahp = {
552     FLOAT_PARAMS(5, 10),
553     .arm_althp = true
554 };
555 
556 static const FloatFmt bfloat16_params = {
557     FLOAT_PARAMS(8, 7)
558 };
559 
560 static const FloatFmt float32_params = {
561     FLOAT_PARAMS(8, 23)
562 };
563 
564 static const FloatFmt float64_params = {
565     FLOAT_PARAMS(11, 52)
566 };
567 
568 static const FloatFmt float128_params = {
569     FLOAT_PARAMS(15, 112)
570 };
571 
572 #define FLOATX80_PARAMS(R)              \
573     FLOAT_PARAMS_(15),                  \
574     .frac_size = R == 64 ? 63 : R,      \
575     .frac_shift = 0,                    \
576     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
577 
578 static const FloatFmt floatx80_params[3] = {
579     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
580     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
581     [floatx80_precision_x] = {
582         FLOATX80_PARAMS(64),
583 #ifdef TARGET_M68K
584         .m68k_denormal = true,
585 #endif
586     },
587 };
588 
589 /* Unpack a float to parts, but do not canonicalize.  */
590 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
591 {
592     const int f_size = fmt->frac_size;
593     const int e_size = fmt->exp_size;
594 
595     *r = (FloatParts64) {
596         .cls = float_class_unclassified,
597         .sign = extract64(raw, f_size + e_size, 1),
598         .exp = extract64(raw, f_size, e_size),
599         .frac = extract64(raw, 0, f_size)
600     };
601 }
602 
603 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
604 {
605     unpack_raw64(p, &float16_params, f);
606 }
607 
608 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
609 {
610     unpack_raw64(p, &bfloat16_params, f);
611 }
612 
613 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
614 {
615     unpack_raw64(p, &float32_params, f);
616 }
617 
618 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
619 {
620     unpack_raw64(p, &float64_params, f);
621 }
622 
623 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
624 {
625     *p = (FloatParts128) {
626         .cls = float_class_unclassified,
627         .sign = extract32(f.high, 15, 1),
628         .exp = extract32(f.high, 0, 15),
629         .frac_hi = f.low
630     };
631 }
632 
633 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
634 {
635     const int f_size = float128_params.frac_size - 64;
636     const int e_size = float128_params.exp_size;
637 
638     *p = (FloatParts128) {
639         .cls = float_class_unclassified,
640         .sign = extract64(f.high, f_size + e_size, 1),
641         .exp = extract64(f.high, f_size, e_size),
642         .frac_hi = extract64(f.high, 0, f_size),
643         .frac_lo = f.low,
644     };
645 }
646 
647 /* Pack a float from parts, but do not canonicalize.  */
648 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
649 {
650     const int f_size = fmt->frac_size;
651     const int e_size = fmt->exp_size;
652     uint64_t ret;
653 
654     ret = (uint64_t)p->sign << (f_size + e_size);
655     ret = deposit64(ret, f_size, e_size, p->exp);
656     ret = deposit64(ret, 0, f_size, p->frac);
657     return ret;
658 }
659 
660 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
661 {
662     return make_float16(pack_raw64(p, &float16_params));
663 }
664 
665 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
666 {
667     return pack_raw64(p, &bfloat16_params);
668 }
669 
670 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
671 {
672     return make_float32(pack_raw64(p, &float32_params));
673 }
674 
675 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
676 {
677     return make_float64(pack_raw64(p, &float64_params));
678 }
679 
680 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
681 {
682     const int f_size = float128_params.frac_size - 64;
683     const int e_size = float128_params.exp_size;
684     uint64_t hi;
685 
686     hi = (uint64_t)p->sign << (f_size + e_size);
687     hi = deposit64(hi, f_size, e_size, p->exp);
688     hi = deposit64(hi, 0, f_size, p->frac_hi);
689     return make_float128(hi, p->frac_lo);
690 }
691 
692 /*----------------------------------------------------------------------------
693 | Functions and definitions to determine:  (1) whether tininess for underflow
694 | is detected before or after rounding by default, (2) what (if anything)
695 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
696 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
697 | are propagated from function inputs to output.  These details are target-
698 | specific.
699 *----------------------------------------------------------------------------*/
700 #include "softfloat-specialize.c.inc"
701 
702 #define PARTS_GENERIC_64_128(NAME, P) \
703     _Generic((P), FloatParts64 *: parts64_##NAME, \
704                   FloatParts128 *: parts128_##NAME)
705 
706 #define PARTS_GENERIC_64_128_256(NAME, P) \
707     _Generic((P), FloatParts64 *: parts64_##NAME, \
708                   FloatParts128 *: parts128_##NAME, \
709                   FloatParts256 *: parts256_##NAME)
710 
711 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
712 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
713 
714 static void parts64_return_nan(FloatParts64 *a, float_status *s);
715 static void parts128_return_nan(FloatParts128 *a, float_status *s);
716 
717 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
718 
719 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
720                                       float_status *s);
721 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
722                                         float_status *s);
723 
724 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
725 
726 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
727                                              FloatParts64 *c, float_status *s,
728                                              int ab_mask, int abc_mask);
729 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
730                                                FloatParts128 *b,
731                                                FloatParts128 *c,
732                                                float_status *s,
733                                                int ab_mask, int abc_mask);
734 
735 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
736     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
737 
738 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
739                                  const FloatFmt *fmt);
740 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
741                                   const FloatFmt *fmt);
742 
743 #define parts_canonicalize(A, S, F) \
744     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
745 
746 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
747                                    const FloatFmt *fmt);
748 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
749                                     const FloatFmt *fmt);
750 
751 #define parts_uncanon_normal(A, S, F) \
752     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
753 
754 static void parts64_uncanon(FloatParts64 *p, float_status *status,
755                             const FloatFmt *fmt);
756 static void parts128_uncanon(FloatParts128 *p, float_status *status,
757                              const FloatFmt *fmt);
758 
759 #define parts_uncanon(A, S, F) \
760     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
761 
762 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
763 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
764 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
765 
766 #define parts_add_normal(A, B) \
767     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
768 
769 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
770 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
771 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
772 
773 #define parts_sub_normal(A, B) \
774     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
775 
776 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
777                                     float_status *s, bool subtract);
778 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
779                                       float_status *s, bool subtract);
780 
781 #define parts_addsub(A, B, S, Z) \
782     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
783 
784 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
785                                  float_status *s);
786 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
787                                    float_status *s);
788 
789 #define parts_mul(A, B, S) \
790     PARTS_GENERIC_64_128(mul, A)(A, B, S)
791 
792 static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
793                                            FloatParts64 *c, int scale,
794                                            int flags, float_status *s);
795 static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
796                                              FloatParts128 *c, int scale,
797                                              int flags, float_status *s);
798 
799 #define parts_muladd_scalbn(A, B, C, Z, Y, S) \
800     PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
801 
802 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
803                                  float_status *s);
804 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
805                                    float_status *s);
806 
807 #define parts_div(A, B, S) \
808     PARTS_GENERIC_64_128(div, A)(A, B, S)
809 
810 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
811                                     uint64_t *mod_quot, float_status *s);
812 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
813                                       uint64_t *mod_quot, float_status *s);
814 
815 #define parts_modrem(A, B, Q, S) \
816     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
817 
818 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
819 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
820 
821 #define parts_sqrt(A, S, F) \
822     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
823 
824 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
825                                         int scale, int frac_size);
826 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
827                                          int scale, int frac_size);
828 
829 #define parts_round_to_int_normal(A, R, C, F) \
830     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
831 
832 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
833                                  int scale, float_status *s,
834                                  const FloatFmt *fmt);
835 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
836                                   int scale, float_status *s,
837                                   const FloatFmt *fmt);
838 
839 #define parts_round_to_int(A, R, C, S, F) \
840     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
841 
842 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
843                                      int scale, int64_t min, int64_t max,
844                                      float_status *s);
845 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
846                                      int scale, int64_t min, int64_t max,
847                                      float_status *s);
848 
849 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
850     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
851 
852 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
853                                       int scale, uint64_t max,
854                                       float_status *s);
855 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
856                                        int scale, uint64_t max,
857                                        float_status *s);
858 
859 #define parts_float_to_uint(P, R, Z, M, S) \
860     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
861 
862 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
863                                             FloatRoundMode rmode,
864                                             int bitsm1, float_status *s);
865 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
866                                              FloatRoundMode rmode,
867                                              int bitsm1, float_status *s);
868 
869 #define parts_float_to_sint_modulo(P, R, M, S) \
870     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
871 
872 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
873                                   int scale, float_status *s);
874 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
875                                    int scale, float_status *s);
876 
877 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
878     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
879 
880 #define parts_sint_to_float(P, I, Z, S) \
881     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
882 
883 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
884                                   int scale, float_status *s);
885 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
886                                    int scale, float_status *s);
887 
888 #define parts_uint_to_float(P, I, Z, S) \
889     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
890 
891 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
892                                     float_status *s, int flags);
893 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
894                                       float_status *s, int flags);
895 
896 #define parts_minmax(A, B, S, F) \
897     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
898 
899 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
900                                      float_status *s, bool q);
901 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
902                                       float_status *s, bool q);
903 
904 #define parts_compare(A, B, S, Q) \
905     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
906 
907 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
908 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
909 
910 #define parts_scalbn(A, N, S) \
911     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
912 
913 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
914 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
915 
916 #define parts_log2(A, S, F) \
917     PARTS_GENERIC_64_128(log2, A)(A, S, F)
918 
919 /*
920  * Helper functions for softfloat-parts.c.inc, per-size operations.
921  */
922 
923 #define FRAC_GENERIC_64_128(NAME, P) \
924     _Generic((P), FloatParts64 *: frac64_##NAME, \
925                   FloatParts128 *: frac128_##NAME)
926 
927 #define FRAC_GENERIC_64_128_256(NAME, P) \
928     _Generic((P), FloatParts64 *: frac64_##NAME, \
929                   FloatParts128 *: frac128_##NAME, \
930                   FloatParts256 *: frac256_##NAME)
931 
932 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
933 {
934     return uadd64_overflow(a->frac, b->frac, &r->frac);
935 }
936 
937 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
938 {
939     bool c = 0;
940     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
941     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
942     return c;
943 }
944 
945 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
946 {
947     bool c = 0;
948     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
949     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
950     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
951     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
952     return c;
953 }
954 
955 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
956 
957 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
958 {
959     return uadd64_overflow(a->frac, c, &r->frac);
960 }
961 
962 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
963 {
964     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
965     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
966 }
967 
968 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
969 
970 static void frac64_allones(FloatParts64 *a)
971 {
972     a->frac = -1;
973 }
974 
975 static void frac128_allones(FloatParts128 *a)
976 {
977     a->frac_hi = a->frac_lo = -1;
978 }
979 
980 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
981 
982 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
983 {
984     return (a->frac == b->frac ? float_relation_equal
985             : a->frac < b->frac ? float_relation_less
986             : float_relation_greater);
987 }
988 
989 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
990 {
991     uint64_t ta = a->frac_hi, tb = b->frac_hi;
992     if (ta == tb) {
993         ta = a->frac_lo, tb = b->frac_lo;
994         if (ta == tb) {
995             return float_relation_equal;
996         }
997     }
998     return ta < tb ? float_relation_less : float_relation_greater;
999 }
1000 
1001 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1002 
1003 static void frac64_clear(FloatParts64 *a)
1004 {
1005     a->frac = 0;
1006 }
1007 
1008 static void frac128_clear(FloatParts128 *a)
1009 {
1010     a->frac_hi = a->frac_lo = 0;
1011 }
1012 
1013 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1014 
1015 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1016 {
1017     uint64_t n1, n0, r, q;
1018     bool ret;
1019 
1020     /*
1021      * We want a 2*N / N-bit division to produce exactly an N-bit
1022      * result, so that we do not lose any precision and so that we
1023      * do not have to renormalize afterward.  If A.frac < B.frac,
1024      * then division would produce an (N-1)-bit result; shift A left
1025      * by one to produce the an N-bit result, and return true to
1026      * decrement the exponent to match.
1027      *
1028      * The udiv_qrnnd algorithm that we're using requires normalization,
1029      * i.e. the msb of the denominator must be set, which is already true.
1030      */
1031     ret = a->frac < b->frac;
1032     if (ret) {
1033         n0 = a->frac;
1034         n1 = 0;
1035     } else {
1036         n0 = a->frac >> 1;
1037         n1 = a->frac << 63;
1038     }
1039     q = udiv_qrnnd(&r, n0, n1, b->frac);
1040 
1041     /* Set lsb if there is a remainder, to set inexact. */
1042     a->frac = q | (r != 0);
1043 
1044     return ret;
1045 }
1046 
1047 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1048 {
1049     uint64_t q0, q1, a0, a1, b0, b1;
1050     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1051     bool ret = false;
1052 
1053     a0 = a->frac_hi, a1 = a->frac_lo;
1054     b0 = b->frac_hi, b1 = b->frac_lo;
1055 
1056     ret = lt128(a0, a1, b0, b1);
1057     if (!ret) {
1058         a1 = shr_double(a0, a1, 1);
1059         a0 = a0 >> 1;
1060     }
1061 
1062     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1063     q0 = estimateDiv128To64(a0, a1, b0);
1064 
1065     /*
1066      * Estimate is high because B1 was not included (unless B1 == 0).
1067      * Reduce quotient and increase remainder until remainder is non-negative.
1068      * This loop will execute 0 to 2 times.
1069      */
1070     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1071     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1072     while (r0 != 0) {
1073         q0--;
1074         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1075     }
1076 
1077     /* Repeat using the remainder, producing a second word of quotient. */
1078     q1 = estimateDiv128To64(r1, r2, b0);
1079     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1080     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1081     while (r1 != 0) {
1082         q1--;
1083         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1084     }
1085 
1086     /* Any remainder indicates inexact; set sticky bit. */
1087     q1 |= (r2 | r3) != 0;
1088 
1089     a->frac_hi = q0;
1090     a->frac_lo = q1;
1091     return ret;
1092 }
1093 
1094 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1095 
1096 static bool frac64_eqz(FloatParts64 *a)
1097 {
1098     return a->frac == 0;
1099 }
1100 
1101 static bool frac128_eqz(FloatParts128 *a)
1102 {
1103     return (a->frac_hi | a->frac_lo) == 0;
1104 }
1105 
1106 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1107 
1108 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1109 {
1110     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1111 }
1112 
1113 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1114 {
1115     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1116                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1117 }
1118 
1119 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1120 
1121 static void frac64_neg(FloatParts64 *a)
1122 {
1123     a->frac = -a->frac;
1124 }
1125 
1126 static void frac128_neg(FloatParts128 *a)
1127 {
1128     bool c = 0;
1129     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1130     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1131 }
1132 
1133 static void frac256_neg(FloatParts256 *a)
1134 {
1135     bool c = 0;
1136     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1138     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1139     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1140 }
1141 
1142 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1143 
1144 static int frac64_normalize(FloatParts64 *a)
1145 {
1146     if (a->frac) {
1147         int shift = clz64(a->frac);
1148         a->frac <<= shift;
1149         return shift;
1150     }
1151     return 64;
1152 }
1153 
1154 static int frac128_normalize(FloatParts128 *a)
1155 {
1156     if (a->frac_hi) {
1157         int shl = clz64(a->frac_hi);
1158         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1159         a->frac_lo <<= shl;
1160         return shl;
1161     } else if (a->frac_lo) {
1162         int shl = clz64(a->frac_lo);
1163         a->frac_hi = a->frac_lo << shl;
1164         a->frac_lo = 0;
1165         return shl + 64;
1166     }
1167     return 128;
1168 }
1169 
1170 static int frac256_normalize(FloatParts256 *a)
1171 {
1172     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1173     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1174     int ret, shl;
1175 
1176     if (likely(a0)) {
1177         shl = clz64(a0);
1178         if (shl == 0) {
1179             return 0;
1180         }
1181         ret = shl;
1182     } else {
1183         if (a1) {
1184             ret = 64;
1185             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1186         } else if (a2) {
1187             ret = 128;
1188             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1189         } else if (a3) {
1190             ret = 192;
1191             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1192         } else {
1193             ret = 256;
1194             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1195             goto done;
1196         }
1197         shl = clz64(a0);
1198         if (shl == 0) {
1199             goto done;
1200         }
1201         ret += shl;
1202     }
1203 
1204     a0 = shl_double(a0, a1, shl);
1205     a1 = shl_double(a1, a2, shl);
1206     a2 = shl_double(a2, a3, shl);
1207     a3 <<= shl;
1208 
1209  done:
1210     a->frac_hi = a0;
1211     a->frac_hm = a1;
1212     a->frac_lm = a2;
1213     a->frac_lo = a3;
1214     return ret;
1215 }
1216 
1217 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1218 
1219 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1220 {
1221     uint64_t a0, a1, b0, t0, t1, q, quot;
1222     int exp_diff = a->exp - b->exp;
1223     int shift;
1224 
1225     a0 = a->frac;
1226     a1 = 0;
1227 
1228     if (exp_diff < -1) {
1229         if (mod_quot) {
1230             *mod_quot = 0;
1231         }
1232         return;
1233     }
1234     if (exp_diff == -1) {
1235         a0 >>= 1;
1236         exp_diff = 0;
1237     }
1238 
1239     b0 = b->frac;
1240     quot = q = b0 <= a0;
1241     if (q) {
1242         a0 -= b0;
1243     }
1244 
1245     exp_diff -= 64;
1246     while (exp_diff > 0) {
1247         q = estimateDiv128To64(a0, a1, b0);
1248         q = q > 2 ? q - 2 : 0;
1249         mul64To128(b0, q, &t0, &t1);
1250         sub128(a0, a1, t0, t1, &a0, &a1);
1251         shortShift128Left(a0, a1, 62, &a0, &a1);
1252         exp_diff -= 62;
1253         quot = (quot << 62) + q;
1254     }
1255 
1256     exp_diff += 64;
1257     if (exp_diff > 0) {
1258         q = estimateDiv128To64(a0, a1, b0);
1259         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1260         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1261         sub128(a0, a1, t0, t1, &a0, &a1);
1262         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1263         while (le128(t0, t1, a0, a1)) {
1264             ++q;
1265             sub128(a0, a1, t0, t1, &a0, &a1);
1266         }
1267         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1268     } else {
1269         t0 = b0;
1270         t1 = 0;
1271     }
1272 
1273     if (mod_quot) {
1274         *mod_quot = quot;
1275     } else {
1276         sub128(t0, t1, a0, a1, &t0, &t1);
1277         if (lt128(t0, t1, a0, a1) ||
1278             (eq128(t0, t1, a0, a1) && (q & 1))) {
1279             a0 = t0;
1280             a1 = t1;
1281             a->sign = !a->sign;
1282         }
1283     }
1284 
1285     if (likely(a0)) {
1286         shift = clz64(a0);
1287         shortShift128Left(a0, a1, shift, &a0, &a1);
1288     } else if (likely(a1)) {
1289         shift = clz64(a1);
1290         a0 = a1 << shift;
1291         a1 = 0;
1292         shift += 64;
1293     } else {
1294         a->cls = float_class_zero;
1295         return;
1296     }
1297 
1298     a->exp = b->exp + exp_diff - shift;
1299     a->frac = a0 | (a1 != 0);
1300 }
1301 
1302 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1303                            uint64_t *mod_quot)
1304 {
1305     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1306     int exp_diff = a->exp - b->exp;
1307     int shift;
1308 
1309     a0 = a->frac_hi;
1310     a1 = a->frac_lo;
1311     a2 = 0;
1312 
1313     if (exp_diff < -1) {
1314         if (mod_quot) {
1315             *mod_quot = 0;
1316         }
1317         return;
1318     }
1319     if (exp_diff == -1) {
1320         shift128Right(a0, a1, 1, &a0, &a1);
1321         exp_diff = 0;
1322     }
1323 
1324     b0 = b->frac_hi;
1325     b1 = b->frac_lo;
1326 
1327     quot = q = le128(b0, b1, a0, a1);
1328     if (q) {
1329         sub128(a0, a1, b0, b1, &a0, &a1);
1330     }
1331 
1332     exp_diff -= 64;
1333     while (exp_diff > 0) {
1334         q = estimateDiv128To64(a0, a1, b0);
1335         q = q > 4 ? q - 4 : 0;
1336         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1337         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1338         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1339         exp_diff -= 61;
1340         quot = (quot << 61) + q;
1341     }
1342 
1343     exp_diff += 64;
1344     if (exp_diff > 0) {
1345         q = estimateDiv128To64(a0, a1, b0);
1346         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1347         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1348         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1349         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1350         while (le192(t0, t1, t2, a0, a1, a2)) {
1351             ++q;
1352             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1353         }
1354         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1355     } else {
1356         t0 = b0;
1357         t1 = b1;
1358         t2 = 0;
1359     }
1360 
1361     if (mod_quot) {
1362         *mod_quot = quot;
1363     } else {
1364         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1365         if (lt192(t0, t1, t2, a0, a1, a2) ||
1366             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1367             a0 = t0;
1368             a1 = t1;
1369             a2 = t2;
1370             a->sign = !a->sign;
1371         }
1372     }
1373 
1374     if (likely(a0)) {
1375         shift = clz64(a0);
1376         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1377     } else if (likely(a1)) {
1378         shift = clz64(a1);
1379         shortShift128Left(a1, a2, shift, &a0, &a1);
1380         a2 = 0;
1381         shift += 64;
1382     } else if (likely(a2)) {
1383         shift = clz64(a2);
1384         a0 = a2 << shift;
1385         a1 = a2 = 0;
1386         shift += 128;
1387     } else {
1388         a->cls = float_class_zero;
1389         return;
1390     }
1391 
1392     a->exp = b->exp + exp_diff - shift;
1393     a->frac_hi = a0;
1394     a->frac_lo = a1 | (a2 != 0);
1395 }
1396 
1397 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1398 
1399 static void frac64_shl(FloatParts64 *a, int c)
1400 {
1401     a->frac <<= c;
1402 }
1403 
1404 static void frac128_shl(FloatParts128 *a, int c)
1405 {
1406     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1407 
1408     if (c & 64) {
1409         a0 = a1, a1 = 0;
1410     }
1411 
1412     c &= 63;
1413     if (c) {
1414         a0 = shl_double(a0, a1, c);
1415         a1 = a1 << c;
1416     }
1417 
1418     a->frac_hi = a0;
1419     a->frac_lo = a1;
1420 }
1421 
1422 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1423 
1424 static void frac64_shr(FloatParts64 *a, int c)
1425 {
1426     a->frac >>= c;
1427 }
1428 
1429 static void frac128_shr(FloatParts128 *a, int c)
1430 {
1431     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1432 
1433     if (c & 64) {
1434         a1 = a0, a0 = 0;
1435     }
1436 
1437     c &= 63;
1438     if (c) {
1439         a1 = shr_double(a0, a1, c);
1440         a0 = a0 >> c;
1441     }
1442 
1443     a->frac_hi = a0;
1444     a->frac_lo = a1;
1445 }
1446 
1447 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1448 
1449 static void frac64_shrjam(FloatParts64 *a, int c)
1450 {
1451     uint64_t a0 = a->frac;
1452 
1453     if (likely(c != 0)) {
1454         if (likely(c < 64)) {
1455             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1456         } else {
1457             a0 = a0 != 0;
1458         }
1459         a->frac = a0;
1460     }
1461 }
1462 
1463 static void frac128_shrjam(FloatParts128 *a, int c)
1464 {
1465     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1466     uint64_t sticky = 0;
1467 
1468     if (unlikely(c == 0)) {
1469         return;
1470     } else if (likely(c < 64)) {
1471         /* nothing */
1472     } else if (likely(c < 128)) {
1473         sticky = a1;
1474         a1 = a0;
1475         a0 = 0;
1476         c &= 63;
1477         if (c == 0) {
1478             goto done;
1479         }
1480     } else {
1481         sticky = a0 | a1;
1482         a0 = a1 = 0;
1483         goto done;
1484     }
1485 
1486     sticky |= shr_double(a1, 0, c);
1487     a1 = shr_double(a0, a1, c);
1488     a0 = a0 >> c;
1489 
1490  done:
1491     a->frac_lo = a1 | (sticky != 0);
1492     a->frac_hi = a0;
1493 }
1494 
1495 static void frac256_shrjam(FloatParts256 *a, int c)
1496 {
1497     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1498     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1499     uint64_t sticky = 0;
1500 
1501     if (unlikely(c == 0)) {
1502         return;
1503     } else if (likely(c < 64)) {
1504         /* nothing */
1505     } else if (likely(c < 256)) {
1506         if (unlikely(c & 128)) {
1507             sticky |= a2 | a3;
1508             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1509         }
1510         if (unlikely(c & 64)) {
1511             sticky |= a3;
1512             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1513         }
1514         c &= 63;
1515         if (c == 0) {
1516             goto done;
1517         }
1518     } else {
1519         sticky = a0 | a1 | a2 | a3;
1520         a0 = a1 = a2 = a3 = 0;
1521         goto done;
1522     }
1523 
1524     sticky |= shr_double(a3, 0, c);
1525     a3 = shr_double(a2, a3, c);
1526     a2 = shr_double(a1, a2, c);
1527     a1 = shr_double(a0, a1, c);
1528     a0 = a0 >> c;
1529 
1530  done:
1531     a->frac_lo = a3 | (sticky != 0);
1532     a->frac_lm = a2;
1533     a->frac_hm = a1;
1534     a->frac_hi = a0;
1535 }
1536 
1537 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1538 
1539 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1540 {
1541     return usub64_overflow(a->frac, b->frac, &r->frac);
1542 }
1543 
1544 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1545 {
1546     bool c = 0;
1547     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1548     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1549     return c;
1550 }
1551 
1552 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1553 {
1554     bool c = 0;
1555     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1556     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1557     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1558     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1559     return c;
1560 }
1561 
1562 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1563 
1564 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1565 {
1566     r->frac = a->frac_hi | (a->frac_lo != 0);
1567 }
1568 
1569 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1570 {
1571     r->frac_hi = a->frac_hi;
1572     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1573 }
1574 
1575 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1576 
1577 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1578 {
1579     r->frac_hi = a->frac;
1580     r->frac_lo = 0;
1581 }
1582 
1583 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1584 {
1585     r->frac_hi = a->frac_hi;
1586     r->frac_hm = a->frac_lo;
1587     r->frac_lm = 0;
1588     r->frac_lo = 0;
1589 }
1590 
1591 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1592 
1593 /*
1594  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1595  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1596  * and thus MIT licenced.
1597  */
1598 static const uint16_t rsqrt_tab[128] = {
1599     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1600     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1601     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1602     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1603     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1604     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1605     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1606     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1607     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1608     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1609     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1610     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1611     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1612     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1613     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1614     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1615 };
1616 
1617 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1618 #define FloatPartsN    glue(FloatParts,N)
1619 #define FloatPartsW    glue(FloatParts,W)
1620 
1621 #define N 64
1622 #define W 128
1623 
1624 #include "softfloat-parts-addsub.c.inc"
1625 #include "softfloat-parts.c.inc"
1626 
1627 #undef  N
1628 #undef  W
1629 #define N 128
1630 #define W 256
1631 
1632 #include "softfloat-parts-addsub.c.inc"
1633 #include "softfloat-parts.c.inc"
1634 
1635 #undef  N
1636 #undef  W
1637 #define N            256
1638 
1639 #include "softfloat-parts-addsub.c.inc"
1640 
1641 #undef  N
1642 #undef  W
1643 #undef  partsN
1644 #undef  FloatPartsN
1645 #undef  FloatPartsW
1646 
1647 /*
1648  * Pack/unpack routines with a specific FloatFmt.
1649  */
1650 
1651 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1652                                       float_status *s, const FloatFmt *params)
1653 {
1654     float16_unpack_raw(p, f);
1655     parts_canonicalize(p, s, params);
1656 }
1657 
1658 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1659                                      float_status *s)
1660 {
1661     float16a_unpack_canonical(p, f, s, &float16_params);
1662 }
1663 
1664 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1665                                       float_status *s)
1666 {
1667     bfloat16_unpack_raw(p, f);
1668     parts_canonicalize(p, s, &bfloat16_params);
1669 }
1670 
1671 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1672                                              float_status *s,
1673                                              const FloatFmt *params)
1674 {
1675     parts_uncanon(p, s, params);
1676     return float16_pack_raw(p);
1677 }
1678 
1679 static float16 float16_round_pack_canonical(FloatParts64 *p,
1680                                             float_status *s)
1681 {
1682     return float16a_round_pack_canonical(p, s, &float16_params);
1683 }
1684 
1685 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1686                                               float_status *s)
1687 {
1688     parts_uncanon(p, s, &bfloat16_params);
1689     return bfloat16_pack_raw(p);
1690 }
1691 
1692 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1693                                      float_status *s)
1694 {
1695     float32_unpack_raw(p, f);
1696     parts_canonicalize(p, s, &float32_params);
1697 }
1698 
1699 static float32 float32_round_pack_canonical(FloatParts64 *p,
1700                                             float_status *s)
1701 {
1702     parts_uncanon(p, s, &float32_params);
1703     return float32_pack_raw(p);
1704 }
1705 
1706 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1707                                      float_status *s)
1708 {
1709     float64_unpack_raw(p, f);
1710     parts_canonicalize(p, s, &float64_params);
1711 }
1712 
1713 static float64 float64_round_pack_canonical(FloatParts64 *p,
1714                                             float_status *s)
1715 {
1716     parts_uncanon(p, s, &float64_params);
1717     return float64_pack_raw(p);
1718 }
1719 
1720 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1721                                                float_status *s)
1722 {
1723     parts_uncanon(p, s, &float32_params);
1724 
1725     /*
1726      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1727      * We need to adjust the fraction higher so that the least N bits are
1728      * zero, and the fraction is adjacent to the float64 implicit bit.
1729      */
1730     switch (p->cls) {
1731     case float_class_normal:
1732         if (unlikely(p->exp == 0)) {
1733             /*
1734              * The result is denormal for float32, but can be represented
1735              * in normalized form for float64.  Adjust, per canonicalize.
1736              */
1737             int shift = frac_normalize(p);
1738             p->exp = (float32_params.frac_shift -
1739                       float32_params.exp_bias - shift + 1 +
1740                       float64_params.exp_bias);
1741             frac_shr(p, float64_params.frac_shift);
1742         } else {
1743             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1744             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1745         }
1746         break;
1747     case float_class_snan:
1748     case float_class_qnan:
1749         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1750         p->exp = float64_params.exp_max;
1751         break;
1752     case float_class_inf:
1753         p->exp = float64_params.exp_max;
1754         break;
1755     case float_class_zero:
1756         break;
1757     default:
1758         g_assert_not_reached();
1759     }
1760 
1761     return float64_pack_raw(p);
1762 }
1763 
1764 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1765                                       float_status *s)
1766 {
1767     float128_unpack_raw(p, f);
1768     parts_canonicalize(p, s, &float128_params);
1769 }
1770 
1771 static float128 float128_round_pack_canonical(FloatParts128 *p,
1772                                               float_status *s)
1773 {
1774     parts_uncanon(p, s, &float128_params);
1775     return float128_pack_raw(p);
1776 }
1777 
1778 /* Returns false if the encoding is invalid. */
1779 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1780                                       float_status *s)
1781 {
1782     /* Ensure rounding precision is set before beginning. */
1783     switch (s->floatx80_rounding_precision) {
1784     case floatx80_precision_x:
1785     case floatx80_precision_d:
1786     case floatx80_precision_s:
1787         break;
1788     default:
1789         g_assert_not_reached();
1790     }
1791 
1792     if (unlikely(floatx80_invalid_encoding(f))) {
1793         float_raise(float_flag_invalid, s);
1794         return false;
1795     }
1796 
1797     floatx80_unpack_raw(p, f);
1798 
1799     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1800         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1801     } else {
1802         /* The explicit integer bit is ignored, after invalid checks. */
1803         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1804         p->cls = (p->frac_hi == 0 ? float_class_inf
1805                   : parts_is_snan_frac(p->frac_hi, s)
1806                   ? float_class_snan : float_class_qnan);
1807     }
1808     return true;
1809 }
1810 
1811 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1812                                               float_status *s)
1813 {
1814     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1815     uint64_t frac;
1816     int exp;
1817 
1818     switch (p->cls) {
1819     case float_class_normal:
1820         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1821             parts_uncanon_normal(p, s, fmt);
1822             frac = p->frac_hi;
1823             exp = p->exp;
1824         } else {
1825             FloatParts64 p64;
1826 
1827             p64.sign = p->sign;
1828             p64.exp = p->exp;
1829             frac_truncjam(&p64, p);
1830             parts_uncanon_normal(&p64, s, fmt);
1831             frac = p64.frac;
1832             exp = p64.exp;
1833         }
1834         if (exp != fmt->exp_max) {
1835             break;
1836         }
1837         /* rounded to inf -- fall through to set frac correctly */
1838 
1839     case float_class_inf:
1840         /* x86 and m68k differ in the setting of the integer bit. */
1841         frac = floatx80_infinity_low;
1842         exp = fmt->exp_max;
1843         break;
1844 
1845     case float_class_zero:
1846         frac = 0;
1847         exp = 0;
1848         break;
1849 
1850     case float_class_snan:
1851     case float_class_qnan:
1852         /* NaNs have the integer bit set. */
1853         frac = p->frac_hi | (1ull << 63);
1854         exp = fmt->exp_max;
1855         break;
1856 
1857     default:
1858         g_assert_not_reached();
1859     }
1860 
1861     return packFloatx80(p->sign, exp, frac);
1862 }
1863 
1864 /*
1865  * Addition and subtraction
1866  */
1867 
1868 static float16 QEMU_FLATTEN
1869 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1870 {
1871     FloatParts64 pa, pb, *pr;
1872 
1873     float16_unpack_canonical(&pa, a, status);
1874     float16_unpack_canonical(&pb, b, status);
1875     pr = parts_addsub(&pa, &pb, status, subtract);
1876 
1877     return float16_round_pack_canonical(pr, status);
1878 }
1879 
1880 float16 float16_add(float16 a, float16 b, float_status *status)
1881 {
1882     return float16_addsub(a, b, status, false);
1883 }
1884 
1885 float16 float16_sub(float16 a, float16 b, float_status *status)
1886 {
1887     return float16_addsub(a, b, status, true);
1888 }
1889 
1890 static float32 QEMU_SOFTFLOAT_ATTR
1891 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1892 {
1893     FloatParts64 pa, pb, *pr;
1894 
1895     float32_unpack_canonical(&pa, a, status);
1896     float32_unpack_canonical(&pb, b, status);
1897     pr = parts_addsub(&pa, &pb, status, subtract);
1898 
1899     return float32_round_pack_canonical(pr, status);
1900 }
1901 
1902 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1903 {
1904     return soft_f32_addsub(a, b, status, false);
1905 }
1906 
1907 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1908 {
1909     return soft_f32_addsub(a, b, status, true);
1910 }
1911 
1912 static float64 QEMU_SOFTFLOAT_ATTR
1913 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1914 {
1915     FloatParts64 pa, pb, *pr;
1916 
1917     float64_unpack_canonical(&pa, a, status);
1918     float64_unpack_canonical(&pb, b, status);
1919     pr = parts_addsub(&pa, &pb, status, subtract);
1920 
1921     return float64_round_pack_canonical(pr, status);
1922 }
1923 
1924 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1925 {
1926     return soft_f64_addsub(a, b, status, false);
1927 }
1928 
1929 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1930 {
1931     return soft_f64_addsub(a, b, status, true);
1932 }
1933 
1934 static float hard_f32_add(float a, float b)
1935 {
1936     return a + b;
1937 }
1938 
1939 static float hard_f32_sub(float a, float b)
1940 {
1941     return a - b;
1942 }
1943 
1944 static double hard_f64_add(double a, double b)
1945 {
1946     return a + b;
1947 }
1948 
1949 static double hard_f64_sub(double a, double b)
1950 {
1951     return a - b;
1952 }
1953 
1954 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1955 {
1956     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1957         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1958     }
1959     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1960 }
1961 
1962 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1963 {
1964     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1965         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1966     } else {
1967         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1968     }
1969 }
1970 
1971 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1972                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1973 {
1974     return float32_gen2(a, b, s, hard, soft,
1975                         f32_is_zon2, f32_addsubmul_post);
1976 }
1977 
1978 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1979                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1980 {
1981     return float64_gen2(a, b, s, hard, soft,
1982                         f64_is_zon2, f64_addsubmul_post);
1983 }
1984 
1985 float32 QEMU_FLATTEN
1986 float32_add(float32 a, float32 b, float_status *s)
1987 {
1988     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1989 }
1990 
1991 float32 QEMU_FLATTEN
1992 float32_sub(float32 a, float32 b, float_status *s)
1993 {
1994     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1995 }
1996 
1997 float64 QEMU_FLATTEN
1998 float64_add(float64 a, float64 b, float_status *s)
1999 {
2000     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2001 }
2002 
2003 float64 QEMU_FLATTEN
2004 float64_sub(float64 a, float64 b, float_status *s)
2005 {
2006     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2007 }
2008 
2009 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2010                                  bool subtract)
2011 {
2012     FloatParts64 pa, pb, *pr;
2013 
2014     float64_unpack_canonical(&pa, a, status);
2015     float64_unpack_canonical(&pb, b, status);
2016     pr = parts_addsub(&pa, &pb, status, subtract);
2017 
2018     return float64r32_round_pack_canonical(pr, status);
2019 }
2020 
2021 float64 float64r32_add(float64 a, float64 b, float_status *status)
2022 {
2023     return float64r32_addsub(a, b, status, false);
2024 }
2025 
2026 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2027 {
2028     return float64r32_addsub(a, b, status, true);
2029 }
2030 
2031 static bfloat16 QEMU_FLATTEN
2032 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2033 {
2034     FloatParts64 pa, pb, *pr;
2035 
2036     bfloat16_unpack_canonical(&pa, a, status);
2037     bfloat16_unpack_canonical(&pb, b, status);
2038     pr = parts_addsub(&pa, &pb, status, subtract);
2039 
2040     return bfloat16_round_pack_canonical(pr, status);
2041 }
2042 
2043 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2044 {
2045     return bfloat16_addsub(a, b, status, false);
2046 }
2047 
2048 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2049 {
2050     return bfloat16_addsub(a, b, status, true);
2051 }
2052 
2053 static float128 QEMU_FLATTEN
2054 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2055 {
2056     FloatParts128 pa, pb, *pr;
2057 
2058     float128_unpack_canonical(&pa, a, status);
2059     float128_unpack_canonical(&pb, b, status);
2060     pr = parts_addsub(&pa, &pb, status, subtract);
2061 
2062     return float128_round_pack_canonical(pr, status);
2063 }
2064 
2065 float128 float128_add(float128 a, float128 b, float_status *status)
2066 {
2067     return float128_addsub(a, b, status, false);
2068 }
2069 
2070 float128 float128_sub(float128 a, float128 b, float_status *status)
2071 {
2072     return float128_addsub(a, b, status, true);
2073 }
2074 
2075 static floatx80 QEMU_FLATTEN
2076 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079 
2080     if (!floatx80_unpack_canonical(&pa, a, status) ||
2081         !floatx80_unpack_canonical(&pb, b, status)) {
2082         return floatx80_default_nan(status);
2083     }
2084 
2085     pr = parts_addsub(&pa, &pb, status, subtract);
2086     return floatx80_round_pack_canonical(pr, status);
2087 }
2088 
2089 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2090 {
2091     return floatx80_addsub(a, b, status, false);
2092 }
2093 
2094 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2095 {
2096     return floatx80_addsub(a, b, status, true);
2097 }
2098 
2099 /*
2100  * Multiplication
2101  */
2102 
2103 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2104 {
2105     FloatParts64 pa, pb, *pr;
2106 
2107     float16_unpack_canonical(&pa, a, status);
2108     float16_unpack_canonical(&pb, b, status);
2109     pr = parts_mul(&pa, &pb, status);
2110 
2111     return float16_round_pack_canonical(pr, status);
2112 }
2113 
2114 static float32 QEMU_SOFTFLOAT_ATTR
2115 soft_f32_mul(float32 a, float32 b, float_status *status)
2116 {
2117     FloatParts64 pa, pb, *pr;
2118 
2119     float32_unpack_canonical(&pa, a, status);
2120     float32_unpack_canonical(&pb, b, status);
2121     pr = parts_mul(&pa, &pb, status);
2122 
2123     return float32_round_pack_canonical(pr, status);
2124 }
2125 
2126 static float64 QEMU_SOFTFLOAT_ATTR
2127 soft_f64_mul(float64 a, float64 b, float_status *status)
2128 {
2129     FloatParts64 pa, pb, *pr;
2130 
2131     float64_unpack_canonical(&pa, a, status);
2132     float64_unpack_canonical(&pb, b, status);
2133     pr = parts_mul(&pa, &pb, status);
2134 
2135     return float64_round_pack_canonical(pr, status);
2136 }
2137 
2138 static float hard_f32_mul(float a, float b)
2139 {
2140     return a * b;
2141 }
2142 
2143 static double hard_f64_mul(double a, double b)
2144 {
2145     return a * b;
2146 }
2147 
2148 float32 QEMU_FLATTEN
2149 float32_mul(float32 a, float32 b, float_status *s)
2150 {
2151     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2152                         f32_is_zon2, f32_addsubmul_post);
2153 }
2154 
2155 float64 QEMU_FLATTEN
2156 float64_mul(float64 a, float64 b, float_status *s)
2157 {
2158     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2159                         f64_is_zon2, f64_addsubmul_post);
2160 }
2161 
2162 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2163 {
2164     FloatParts64 pa, pb, *pr;
2165 
2166     float64_unpack_canonical(&pa, a, status);
2167     float64_unpack_canonical(&pb, b, status);
2168     pr = parts_mul(&pa, &pb, status);
2169 
2170     return float64r32_round_pack_canonical(pr, status);
2171 }
2172 
2173 bfloat16 QEMU_FLATTEN
2174 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2175 {
2176     FloatParts64 pa, pb, *pr;
2177 
2178     bfloat16_unpack_canonical(&pa, a, status);
2179     bfloat16_unpack_canonical(&pb, b, status);
2180     pr = parts_mul(&pa, &pb, status);
2181 
2182     return bfloat16_round_pack_canonical(pr, status);
2183 }
2184 
2185 float128 QEMU_FLATTEN
2186 float128_mul(float128 a, float128 b, float_status *status)
2187 {
2188     FloatParts128 pa, pb, *pr;
2189 
2190     float128_unpack_canonical(&pa, a, status);
2191     float128_unpack_canonical(&pb, b, status);
2192     pr = parts_mul(&pa, &pb, status);
2193 
2194     return float128_round_pack_canonical(pr, status);
2195 }
2196 
2197 floatx80 QEMU_FLATTEN
2198 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2199 {
2200     FloatParts128 pa, pb, *pr;
2201 
2202     if (!floatx80_unpack_canonical(&pa, a, status) ||
2203         !floatx80_unpack_canonical(&pb, b, status)) {
2204         return floatx80_default_nan(status);
2205     }
2206 
2207     pr = parts_mul(&pa, &pb, status);
2208     return floatx80_round_pack_canonical(pr, status);
2209 }
2210 
2211 /*
2212  * Fused multiply-add
2213  */
2214 
2215 float16 QEMU_FLATTEN
2216 float16_muladd_scalbn(float16 a, float16 b, float16 c,
2217                       int scale, int flags, float_status *status)
2218 {
2219     FloatParts64 pa, pb, pc, *pr;
2220 
2221     float16_unpack_canonical(&pa, a, status);
2222     float16_unpack_canonical(&pb, b, status);
2223     float16_unpack_canonical(&pc, c, status);
2224     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2225 
2226     return float16_round_pack_canonical(pr, status);
2227 }
2228 
2229 float16 float16_muladd(float16 a, float16 b, float16 c,
2230                        int flags, float_status *status)
2231 {
2232     return float16_muladd_scalbn(a, b, c, 0, flags, status);
2233 }
2234 
2235 float32 QEMU_SOFTFLOAT_ATTR
2236 float32_muladd_scalbn(float32 a, float32 b, float32 c,
2237                       int scale, int flags, float_status *status)
2238 {
2239     FloatParts64 pa, pb, pc, *pr;
2240 
2241     float32_unpack_canonical(&pa, a, status);
2242     float32_unpack_canonical(&pb, b, status);
2243     float32_unpack_canonical(&pc, c, status);
2244     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2245 
2246     return float32_round_pack_canonical(pr, status);
2247 }
2248 
2249 float64 QEMU_SOFTFLOAT_ATTR
2250 float64_muladd_scalbn(float64 a, float64 b, float64 c,
2251                       int scale, int flags, float_status *status)
2252 {
2253     FloatParts64 pa, pb, pc, *pr;
2254 
2255     float64_unpack_canonical(&pa, a, status);
2256     float64_unpack_canonical(&pb, b, status);
2257     float64_unpack_canonical(&pc, c, status);
2258     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2259 
2260     return float64_round_pack_canonical(pr, status);
2261 }
2262 
2263 static bool force_soft_fma;
2264 
2265 float32 QEMU_FLATTEN
2266 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2267 {
2268     union_float32 ua, ub, uc, ur;
2269 
2270     ua.s = xa;
2271     ub.s = xb;
2272     uc.s = xc;
2273 
2274     if (unlikely(!can_use_fpu(s))) {
2275         goto soft;
2276     }
2277 
2278     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2279     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2280         goto soft;
2281     }
2282 
2283     if (unlikely(force_soft_fma)) {
2284         goto soft;
2285     }
2286 
2287     /*
2288      * When (a || b) == 0, there's no need to check for under/over flow,
2289      * since we know the addend is (normal || 0) and the product is 0.
2290      */
2291     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2292         union_float32 up;
2293         bool prod_sign;
2294 
2295         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2296         prod_sign ^= !!(flags & float_muladd_negate_product);
2297         up.s = float32_set_sign(float32_zero, prod_sign);
2298 
2299         if (flags & float_muladd_negate_c) {
2300             uc.h = -uc.h;
2301         }
2302         ur.h = up.h + uc.h;
2303     } else {
2304         union_float32 ua_orig = ua;
2305         union_float32 uc_orig = uc;
2306 
2307         if (flags & float_muladd_negate_product) {
2308             ua.h = -ua.h;
2309         }
2310         if (flags & float_muladd_negate_c) {
2311             uc.h = -uc.h;
2312         }
2313 
2314         ur.h = fmaf(ua.h, ub.h, uc.h);
2315 
2316         if (unlikely(f32_is_inf(ur))) {
2317             float_raise(float_flag_overflow, s);
2318         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2319             ua = ua_orig;
2320             uc = uc_orig;
2321             goto soft;
2322         }
2323     }
2324     if (flags & float_muladd_negate_result) {
2325         return float32_chs(ur.s);
2326     }
2327     return ur.s;
2328 
2329  soft:
2330     return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2331 }
2332 
2333 float64 QEMU_FLATTEN
2334 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2335 {
2336     union_float64 ua, ub, uc, ur;
2337 
2338     ua.s = xa;
2339     ub.s = xb;
2340     uc.s = xc;
2341 
2342     if (unlikely(!can_use_fpu(s))) {
2343         goto soft;
2344     }
2345 
2346     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2347     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2348         goto soft;
2349     }
2350 
2351     if (unlikely(force_soft_fma)) {
2352         goto soft;
2353     }
2354 
2355     /*
2356      * When (a || b) == 0, there's no need to check for under/over flow,
2357      * since we know the addend is (normal || 0) and the product is 0.
2358      */
2359     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2360         union_float64 up;
2361         bool prod_sign;
2362 
2363         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2364         prod_sign ^= !!(flags & float_muladd_negate_product);
2365         up.s = float64_set_sign(float64_zero, prod_sign);
2366 
2367         if (flags & float_muladd_negate_c) {
2368             uc.h = -uc.h;
2369         }
2370         ur.h = up.h + uc.h;
2371     } else {
2372         union_float64 ua_orig = ua;
2373         union_float64 uc_orig = uc;
2374 
2375         if (flags & float_muladd_negate_product) {
2376             ua.h = -ua.h;
2377         }
2378         if (flags & float_muladd_negate_c) {
2379             uc.h = -uc.h;
2380         }
2381 
2382         ur.h = fma(ua.h, ub.h, uc.h);
2383 
2384         if (unlikely(f64_is_inf(ur))) {
2385             float_raise(float_flag_overflow, s);
2386         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2387             ua = ua_orig;
2388             uc = uc_orig;
2389             goto soft;
2390         }
2391     }
2392     if (flags & float_muladd_negate_result) {
2393         return float64_chs(ur.s);
2394     }
2395     return ur.s;
2396 
2397  soft:
2398     return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2399 }
2400 
2401 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2402                           int flags, float_status *status)
2403 {
2404     FloatParts64 pa, pb, pc, *pr;
2405 
2406     float64_unpack_canonical(&pa, a, status);
2407     float64_unpack_canonical(&pb, b, status);
2408     float64_unpack_canonical(&pc, c, status);
2409     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2410 
2411     return float64r32_round_pack_canonical(pr, status);
2412 }
2413 
2414 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2415                                       int flags, float_status *status)
2416 {
2417     FloatParts64 pa, pb, pc, *pr;
2418 
2419     bfloat16_unpack_canonical(&pa, a, status);
2420     bfloat16_unpack_canonical(&pb, b, status);
2421     bfloat16_unpack_canonical(&pc, c, status);
2422     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2423 
2424     return bfloat16_round_pack_canonical(pr, status);
2425 }
2426 
2427 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2428                                       int flags, float_status *status)
2429 {
2430     FloatParts128 pa, pb, pc, *pr;
2431 
2432     float128_unpack_canonical(&pa, a, status);
2433     float128_unpack_canonical(&pb, b, status);
2434     float128_unpack_canonical(&pc, c, status);
2435     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2436 
2437     return float128_round_pack_canonical(pr, status);
2438 }
2439 
2440 /*
2441  * Division
2442  */
2443 
2444 float16 float16_div(float16 a, float16 b, float_status *status)
2445 {
2446     FloatParts64 pa, pb, *pr;
2447 
2448     float16_unpack_canonical(&pa, a, status);
2449     float16_unpack_canonical(&pb, b, status);
2450     pr = parts_div(&pa, &pb, status);
2451 
2452     return float16_round_pack_canonical(pr, status);
2453 }
2454 
2455 static float32 QEMU_SOFTFLOAT_ATTR
2456 soft_f32_div(float32 a, float32 b, float_status *status)
2457 {
2458     FloatParts64 pa, pb, *pr;
2459 
2460     float32_unpack_canonical(&pa, a, status);
2461     float32_unpack_canonical(&pb, b, status);
2462     pr = parts_div(&pa, &pb, status);
2463 
2464     return float32_round_pack_canonical(pr, status);
2465 }
2466 
2467 static float64 QEMU_SOFTFLOAT_ATTR
2468 soft_f64_div(float64 a, float64 b, float_status *status)
2469 {
2470     FloatParts64 pa, pb, *pr;
2471 
2472     float64_unpack_canonical(&pa, a, status);
2473     float64_unpack_canonical(&pb, b, status);
2474     pr = parts_div(&pa, &pb, status);
2475 
2476     return float64_round_pack_canonical(pr, status);
2477 }
2478 
2479 static float hard_f32_div(float a, float b)
2480 {
2481     return a / b;
2482 }
2483 
2484 static double hard_f64_div(double a, double b)
2485 {
2486     return a / b;
2487 }
2488 
2489 static bool f32_div_pre(union_float32 a, union_float32 b)
2490 {
2491     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2492         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2493                fpclassify(b.h) == FP_NORMAL;
2494     }
2495     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2496 }
2497 
2498 static bool f64_div_pre(union_float64 a, union_float64 b)
2499 {
2500     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2501         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2502                fpclassify(b.h) == FP_NORMAL;
2503     }
2504     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2505 }
2506 
2507 static bool f32_div_post(union_float32 a, union_float32 b)
2508 {
2509     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2510         return fpclassify(a.h) != FP_ZERO;
2511     }
2512     return !float32_is_zero(a.s);
2513 }
2514 
2515 static bool f64_div_post(union_float64 a, union_float64 b)
2516 {
2517     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2518         return fpclassify(a.h) != FP_ZERO;
2519     }
2520     return !float64_is_zero(a.s);
2521 }
2522 
2523 float32 QEMU_FLATTEN
2524 float32_div(float32 a, float32 b, float_status *s)
2525 {
2526     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2527                         f32_div_pre, f32_div_post);
2528 }
2529 
2530 float64 QEMU_FLATTEN
2531 float64_div(float64 a, float64 b, float_status *s)
2532 {
2533     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2534                         f64_div_pre, f64_div_post);
2535 }
2536 
2537 float64 float64r32_div(float64 a, float64 b, float_status *status)
2538 {
2539     FloatParts64 pa, pb, *pr;
2540 
2541     float64_unpack_canonical(&pa, a, status);
2542     float64_unpack_canonical(&pb, b, status);
2543     pr = parts_div(&pa, &pb, status);
2544 
2545     return float64r32_round_pack_canonical(pr, status);
2546 }
2547 
2548 bfloat16 QEMU_FLATTEN
2549 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2550 {
2551     FloatParts64 pa, pb, *pr;
2552 
2553     bfloat16_unpack_canonical(&pa, a, status);
2554     bfloat16_unpack_canonical(&pb, b, status);
2555     pr = parts_div(&pa, &pb, status);
2556 
2557     return bfloat16_round_pack_canonical(pr, status);
2558 }
2559 
2560 float128 QEMU_FLATTEN
2561 float128_div(float128 a, float128 b, float_status *status)
2562 {
2563     FloatParts128 pa, pb, *pr;
2564 
2565     float128_unpack_canonical(&pa, a, status);
2566     float128_unpack_canonical(&pb, b, status);
2567     pr = parts_div(&pa, &pb, status);
2568 
2569     return float128_round_pack_canonical(pr, status);
2570 }
2571 
2572 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2573 {
2574     FloatParts128 pa, pb, *pr;
2575 
2576     if (!floatx80_unpack_canonical(&pa, a, status) ||
2577         !floatx80_unpack_canonical(&pb, b, status)) {
2578         return floatx80_default_nan(status);
2579     }
2580 
2581     pr = parts_div(&pa, &pb, status);
2582     return floatx80_round_pack_canonical(pr, status);
2583 }
2584 
2585 /*
2586  * Remainder
2587  */
2588 
2589 float32 float32_rem(float32 a, float32 b, float_status *status)
2590 {
2591     FloatParts64 pa, pb, *pr;
2592 
2593     float32_unpack_canonical(&pa, a, status);
2594     float32_unpack_canonical(&pb, b, status);
2595     pr = parts_modrem(&pa, &pb, NULL, status);
2596 
2597     return float32_round_pack_canonical(pr, status);
2598 }
2599 
2600 float64 float64_rem(float64 a, float64 b, float_status *status)
2601 {
2602     FloatParts64 pa, pb, *pr;
2603 
2604     float64_unpack_canonical(&pa, a, status);
2605     float64_unpack_canonical(&pb, b, status);
2606     pr = parts_modrem(&pa, &pb, NULL, status);
2607 
2608     return float64_round_pack_canonical(pr, status);
2609 }
2610 
2611 float128 float128_rem(float128 a, float128 b, float_status *status)
2612 {
2613     FloatParts128 pa, pb, *pr;
2614 
2615     float128_unpack_canonical(&pa, a, status);
2616     float128_unpack_canonical(&pb, b, status);
2617     pr = parts_modrem(&pa, &pb, NULL, status);
2618 
2619     return float128_round_pack_canonical(pr, status);
2620 }
2621 
2622 /*
2623  * Returns the remainder of the extended double-precision floating-point value
2624  * `a' with respect to the corresponding value `b'.
2625  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2626  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2627  * the remainder based on truncating the quotient toward zero instead and
2628  * *quotient is set to the low 64 bits of the absolute value of the integer
2629  * quotient.
2630  */
2631 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2632                          uint64_t *quotient, float_status *status)
2633 {
2634     FloatParts128 pa, pb, *pr;
2635 
2636     *quotient = 0;
2637     if (!floatx80_unpack_canonical(&pa, a, status) ||
2638         !floatx80_unpack_canonical(&pb, b, status)) {
2639         return floatx80_default_nan(status);
2640     }
2641     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2642 
2643     return floatx80_round_pack_canonical(pr, status);
2644 }
2645 
2646 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2647 {
2648     uint64_t quotient;
2649     return floatx80_modrem(a, b, false, &quotient, status);
2650 }
2651 
2652 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2653 {
2654     uint64_t quotient;
2655     return floatx80_modrem(a, b, true, &quotient, status);
2656 }
2657 
2658 /*
2659  * Float to Float conversions
2660  *
2661  * Returns the result of converting one float format to another. The
2662  * conversion is performed according to the IEC/IEEE Standard for
2663  * Binary Floating-Point Arithmetic.
2664  *
2665  * Usually this only needs to take care of raising invalid exceptions
2666  * and handling the conversion on NaNs.
2667  */
2668 
2669 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2670 {
2671     switch (a->cls) {
2672     case float_class_snan:
2673         float_raise(float_flag_invalid_snan, s);
2674         /* fall through */
2675     case float_class_qnan:
2676         /*
2677          * There is no NaN in the destination format.  Raise Invalid
2678          * and return a zero with the sign of the input NaN.
2679          */
2680         float_raise(float_flag_invalid, s);
2681         a->cls = float_class_zero;
2682         break;
2683 
2684     case float_class_inf:
2685         /*
2686          * There is no Inf in the destination format.  Raise Invalid
2687          * and return the maximum normal with the correct sign.
2688          */
2689         float_raise(float_flag_invalid, s);
2690         a->cls = float_class_normal;
2691         a->exp = float16_params_ahp.exp_max;
2692         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2693                                   float16_params_ahp.frac_size + 1);
2694         break;
2695 
2696     case float_class_normal:
2697     case float_class_zero:
2698         break;
2699 
2700     default:
2701         g_assert_not_reached();
2702     }
2703 }
2704 
2705 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2706 {
2707     if (is_nan(a->cls)) {
2708         parts_return_nan(a, s);
2709     }
2710 }
2711 
2712 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2713 {
2714     if (is_nan(a->cls)) {
2715         parts_return_nan(a, s);
2716     }
2717 }
2718 
2719 #define parts_float_to_float(P, S) \
2720     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2721 
2722 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2723                                         float_status *s)
2724 {
2725     a->cls = b->cls;
2726     a->sign = b->sign;
2727     a->exp = b->exp;
2728 
2729     if (a->cls == float_class_normal) {
2730         frac_truncjam(a, b);
2731     } else if (is_nan(a->cls)) {
2732         /* Discard the low bits of the NaN. */
2733         a->frac = b->frac_hi;
2734         parts_return_nan(a, s);
2735     }
2736 }
2737 
2738 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2739                                        float_status *s)
2740 {
2741     a->cls = b->cls;
2742     a->sign = b->sign;
2743     a->exp = b->exp;
2744     frac_widen(a, b);
2745 
2746     if (is_nan(a->cls)) {
2747         parts_return_nan(a, s);
2748     }
2749 }
2750 
2751 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2752 {
2753     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2754     FloatParts64 p;
2755 
2756     float16a_unpack_canonical(&p, a, s, fmt16);
2757     parts_float_to_float(&p, s);
2758     return float32_round_pack_canonical(&p, s);
2759 }
2760 
2761 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2762 {
2763     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2764     FloatParts64 p;
2765 
2766     float16a_unpack_canonical(&p, a, s, fmt16);
2767     parts_float_to_float(&p, s);
2768     return float64_round_pack_canonical(&p, s);
2769 }
2770 
2771 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2772 {
2773     FloatParts64 p;
2774     const FloatFmt *fmt;
2775 
2776     float32_unpack_canonical(&p, a, s);
2777     if (ieee) {
2778         parts_float_to_float(&p, s);
2779         fmt = &float16_params;
2780     } else {
2781         parts_float_to_ahp(&p, s);
2782         fmt = &float16_params_ahp;
2783     }
2784     return float16a_round_pack_canonical(&p, s, fmt);
2785 }
2786 
2787 static float64 QEMU_SOFTFLOAT_ATTR
2788 soft_float32_to_float64(float32 a, float_status *s)
2789 {
2790     FloatParts64 p;
2791 
2792     float32_unpack_canonical(&p, a, s);
2793     parts_float_to_float(&p, s);
2794     return float64_round_pack_canonical(&p, s);
2795 }
2796 
2797 float64 float32_to_float64(float32 a, float_status *s)
2798 {
2799     if (likely(float32_is_normal(a))) {
2800         /* Widening conversion can never produce inexact results.  */
2801         union_float32 uf;
2802         union_float64 ud;
2803         uf.s = a;
2804         ud.h = uf.h;
2805         return ud.s;
2806     } else if (float32_is_zero(a)) {
2807         return float64_set_sign(float64_zero, float32_is_neg(a));
2808     } else {
2809         return soft_float32_to_float64(a, s);
2810     }
2811 }
2812 
2813 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2814 {
2815     FloatParts64 p;
2816     const FloatFmt *fmt;
2817 
2818     float64_unpack_canonical(&p, a, s);
2819     if (ieee) {
2820         parts_float_to_float(&p, s);
2821         fmt = &float16_params;
2822     } else {
2823         parts_float_to_ahp(&p, s);
2824         fmt = &float16_params_ahp;
2825     }
2826     return float16a_round_pack_canonical(&p, s, fmt);
2827 }
2828 
2829 float32 float64_to_float32(float64 a, float_status *s)
2830 {
2831     FloatParts64 p;
2832 
2833     float64_unpack_canonical(&p, a, s);
2834     parts_float_to_float(&p, s);
2835     return float32_round_pack_canonical(&p, s);
2836 }
2837 
2838 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2839 {
2840     FloatParts64 p;
2841 
2842     bfloat16_unpack_canonical(&p, a, s);
2843     parts_float_to_float(&p, s);
2844     return float32_round_pack_canonical(&p, s);
2845 }
2846 
2847 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2848 {
2849     FloatParts64 p;
2850 
2851     bfloat16_unpack_canonical(&p, a, s);
2852     parts_float_to_float(&p, s);
2853     return float64_round_pack_canonical(&p, s);
2854 }
2855 
2856 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2857 {
2858     FloatParts64 p;
2859 
2860     float32_unpack_canonical(&p, a, s);
2861     parts_float_to_float(&p, s);
2862     return bfloat16_round_pack_canonical(&p, s);
2863 }
2864 
2865 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2866 {
2867     FloatParts64 p;
2868 
2869     float64_unpack_canonical(&p, a, s);
2870     parts_float_to_float(&p, s);
2871     return bfloat16_round_pack_canonical(&p, s);
2872 }
2873 
2874 float32 float128_to_float32(float128 a, float_status *s)
2875 {
2876     FloatParts64 p64;
2877     FloatParts128 p128;
2878 
2879     float128_unpack_canonical(&p128, a, s);
2880     parts_float_to_float_narrow(&p64, &p128, s);
2881     return float32_round_pack_canonical(&p64, s);
2882 }
2883 
2884 float64 float128_to_float64(float128 a, float_status *s)
2885 {
2886     FloatParts64 p64;
2887     FloatParts128 p128;
2888 
2889     float128_unpack_canonical(&p128, a, s);
2890     parts_float_to_float_narrow(&p64, &p128, s);
2891     return float64_round_pack_canonical(&p64, s);
2892 }
2893 
2894 float128 float32_to_float128(float32 a, float_status *s)
2895 {
2896     FloatParts64 p64;
2897     FloatParts128 p128;
2898 
2899     float32_unpack_canonical(&p64, a, s);
2900     parts_float_to_float_widen(&p128, &p64, s);
2901     return float128_round_pack_canonical(&p128, s);
2902 }
2903 
2904 float128 float64_to_float128(float64 a, float_status *s)
2905 {
2906     FloatParts64 p64;
2907     FloatParts128 p128;
2908 
2909     float64_unpack_canonical(&p64, a, s);
2910     parts_float_to_float_widen(&p128, &p64, s);
2911     return float128_round_pack_canonical(&p128, s);
2912 }
2913 
2914 float32 floatx80_to_float32(floatx80 a, float_status *s)
2915 {
2916     FloatParts64 p64;
2917     FloatParts128 p128;
2918 
2919     if (floatx80_unpack_canonical(&p128, a, s)) {
2920         parts_float_to_float_narrow(&p64, &p128, s);
2921     } else {
2922         parts_default_nan(&p64, s);
2923     }
2924     return float32_round_pack_canonical(&p64, s);
2925 }
2926 
2927 float64 floatx80_to_float64(floatx80 a, float_status *s)
2928 {
2929     FloatParts64 p64;
2930     FloatParts128 p128;
2931 
2932     if (floatx80_unpack_canonical(&p128, a, s)) {
2933         parts_float_to_float_narrow(&p64, &p128, s);
2934     } else {
2935         parts_default_nan(&p64, s);
2936     }
2937     return float64_round_pack_canonical(&p64, s);
2938 }
2939 
2940 float128 floatx80_to_float128(floatx80 a, float_status *s)
2941 {
2942     FloatParts128 p;
2943 
2944     if (floatx80_unpack_canonical(&p, a, s)) {
2945         parts_float_to_float(&p, s);
2946     } else {
2947         parts_default_nan(&p, s);
2948     }
2949     return float128_round_pack_canonical(&p, s);
2950 }
2951 
2952 floatx80 float32_to_floatx80(float32 a, float_status *s)
2953 {
2954     FloatParts64 p64;
2955     FloatParts128 p128;
2956 
2957     float32_unpack_canonical(&p64, a, s);
2958     parts_float_to_float_widen(&p128, &p64, s);
2959     return floatx80_round_pack_canonical(&p128, s);
2960 }
2961 
2962 floatx80 float64_to_floatx80(float64 a, float_status *s)
2963 {
2964     FloatParts64 p64;
2965     FloatParts128 p128;
2966 
2967     float64_unpack_canonical(&p64, a, s);
2968     parts_float_to_float_widen(&p128, &p64, s);
2969     return floatx80_round_pack_canonical(&p128, s);
2970 }
2971 
2972 floatx80 float128_to_floatx80(float128 a, float_status *s)
2973 {
2974     FloatParts128 p;
2975 
2976     float128_unpack_canonical(&p, a, s);
2977     parts_float_to_float(&p, s);
2978     return floatx80_round_pack_canonical(&p, s);
2979 }
2980 
2981 /*
2982  * Round to integral value
2983  */
2984 
2985 float16 float16_round_to_int(float16 a, float_status *s)
2986 {
2987     FloatParts64 p;
2988 
2989     float16_unpack_canonical(&p, a, s);
2990     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2991     return float16_round_pack_canonical(&p, s);
2992 }
2993 
2994 float32 float32_round_to_int(float32 a, float_status *s)
2995 {
2996     FloatParts64 p;
2997 
2998     float32_unpack_canonical(&p, a, s);
2999     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
3000     return float32_round_pack_canonical(&p, s);
3001 }
3002 
3003 float64 float64_round_to_int(float64 a, float_status *s)
3004 {
3005     FloatParts64 p;
3006 
3007     float64_unpack_canonical(&p, a, s);
3008     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3009     return float64_round_pack_canonical(&p, s);
3010 }
3011 
3012 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3013 {
3014     FloatParts64 p;
3015 
3016     bfloat16_unpack_canonical(&p, a, s);
3017     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3018     return bfloat16_round_pack_canonical(&p, s);
3019 }
3020 
3021 float128 float128_round_to_int(float128 a, float_status *s)
3022 {
3023     FloatParts128 p;
3024 
3025     float128_unpack_canonical(&p, a, s);
3026     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3027     return float128_round_pack_canonical(&p, s);
3028 }
3029 
3030 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3031 {
3032     FloatParts128 p;
3033 
3034     if (!floatx80_unpack_canonical(&p, a, status)) {
3035         return floatx80_default_nan(status);
3036     }
3037 
3038     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3039                        &floatx80_params[status->floatx80_rounding_precision]);
3040     return floatx80_round_pack_canonical(&p, status);
3041 }
3042 
3043 /*
3044  * Floating-point to signed integer conversions
3045  */
3046 
3047 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3048                               float_status *s)
3049 {
3050     FloatParts64 p;
3051 
3052     float16_unpack_canonical(&p, a, s);
3053     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3054 }
3055 
3056 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3057                                 float_status *s)
3058 {
3059     FloatParts64 p;
3060 
3061     float16_unpack_canonical(&p, a, s);
3062     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3063 }
3064 
3065 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3066                                 float_status *s)
3067 {
3068     FloatParts64 p;
3069 
3070     float16_unpack_canonical(&p, a, s);
3071     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3072 }
3073 
3074 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3075                                 float_status *s)
3076 {
3077     FloatParts64 p;
3078 
3079     float16_unpack_canonical(&p, a, s);
3080     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3081 }
3082 
3083 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3084                                 float_status *s)
3085 {
3086     FloatParts64 p;
3087 
3088     float32_unpack_canonical(&p, a, s);
3089     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3090 }
3091 
3092 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3093                                 float_status *s)
3094 {
3095     FloatParts64 p;
3096 
3097     float32_unpack_canonical(&p, a, s);
3098     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3099 }
3100 
3101 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3102                                 float_status *s)
3103 {
3104     FloatParts64 p;
3105 
3106     float32_unpack_canonical(&p, a, s);
3107     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3108 }
3109 
3110 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3111                                 float_status *s)
3112 {
3113     FloatParts64 p;
3114 
3115     float64_unpack_canonical(&p, a, s);
3116     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3117 }
3118 
3119 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3120                                 float_status *s)
3121 {
3122     FloatParts64 p;
3123 
3124     float64_unpack_canonical(&p, a, s);
3125     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3126 }
3127 
3128 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3129                                 float_status *s)
3130 {
3131     FloatParts64 p;
3132 
3133     float64_unpack_canonical(&p, a, s);
3134     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3135 }
3136 
3137 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3138                                float_status *s)
3139 {
3140     FloatParts64 p;
3141 
3142     bfloat16_unpack_canonical(&p, a, s);
3143     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3144 }
3145 
3146 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3147                                  float_status *s)
3148 {
3149     FloatParts64 p;
3150 
3151     bfloat16_unpack_canonical(&p, a, s);
3152     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3153 }
3154 
3155 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3156                                  float_status *s)
3157 {
3158     FloatParts64 p;
3159 
3160     bfloat16_unpack_canonical(&p, a, s);
3161     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3162 }
3163 
3164 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3165                                  float_status *s)
3166 {
3167     FloatParts64 p;
3168 
3169     bfloat16_unpack_canonical(&p, a, s);
3170     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3171 }
3172 
3173 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3174                                         int scale, float_status *s)
3175 {
3176     FloatParts128 p;
3177 
3178     float128_unpack_canonical(&p, a, s);
3179     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3180 }
3181 
3182 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3183                                         int scale, float_status *s)
3184 {
3185     FloatParts128 p;
3186 
3187     float128_unpack_canonical(&p, a, s);
3188     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3189 }
3190 
3191 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3192                                         int scale, float_status *s)
3193 {
3194     int flags = 0;
3195     Int128 r;
3196     FloatParts128 p;
3197 
3198     float128_unpack_canonical(&p, a, s);
3199 
3200     switch (p.cls) {
3201     case float_class_snan:
3202         flags |= float_flag_invalid_snan;
3203         /* fall through */
3204     case float_class_qnan:
3205         flags |= float_flag_invalid;
3206         r = UINT128_MAX;
3207         break;
3208 
3209     case float_class_inf:
3210         flags = float_flag_invalid | float_flag_invalid_cvti;
3211         r = p.sign ? INT128_MIN : INT128_MAX;
3212         break;
3213 
3214     case float_class_zero:
3215         return int128_zero();
3216 
3217     case float_class_normal:
3218         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3219             flags = float_flag_inexact;
3220         }
3221 
3222         if (p.exp < 127) {
3223             int shift = 127 - p.exp;
3224             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3225             if (p.sign) {
3226                 r = int128_neg(r);
3227             }
3228         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3229                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3230             r = INT128_MIN;
3231         } else {
3232             flags = float_flag_invalid | float_flag_invalid_cvti;
3233             r = p.sign ? INT128_MIN : INT128_MAX;
3234         }
3235         break;
3236 
3237     default:
3238         g_assert_not_reached();
3239     }
3240 
3241     float_raise(flags, s);
3242     return r;
3243 }
3244 
3245 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3246                                         int scale, float_status *s)
3247 {
3248     FloatParts128 p;
3249 
3250     if (!floatx80_unpack_canonical(&p, a, s)) {
3251         parts_default_nan(&p, s);
3252     }
3253     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3254 }
3255 
3256 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3257                                         int scale, float_status *s)
3258 {
3259     FloatParts128 p;
3260 
3261     if (!floatx80_unpack_canonical(&p, a, s)) {
3262         parts_default_nan(&p, s);
3263     }
3264     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3265 }
3266 
3267 int8_t float16_to_int8(float16 a, float_status *s)
3268 {
3269     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3270 }
3271 
3272 int16_t float16_to_int16(float16 a, float_status *s)
3273 {
3274     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3275 }
3276 
3277 int32_t float16_to_int32(float16 a, float_status *s)
3278 {
3279     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3280 }
3281 
3282 int64_t float16_to_int64(float16 a, float_status *s)
3283 {
3284     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3285 }
3286 
3287 int16_t float32_to_int16(float32 a, float_status *s)
3288 {
3289     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3290 }
3291 
3292 int32_t float32_to_int32(float32 a, float_status *s)
3293 {
3294     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3295 }
3296 
3297 int64_t float32_to_int64(float32 a, float_status *s)
3298 {
3299     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3300 }
3301 
3302 int16_t float64_to_int16(float64 a, float_status *s)
3303 {
3304     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3305 }
3306 
3307 int32_t float64_to_int32(float64 a, float_status *s)
3308 {
3309     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3310 }
3311 
3312 int64_t float64_to_int64(float64 a, float_status *s)
3313 {
3314     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3315 }
3316 
3317 int32_t float128_to_int32(float128 a, float_status *s)
3318 {
3319     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3320 }
3321 
3322 int64_t float128_to_int64(float128 a, float_status *s)
3323 {
3324     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3325 }
3326 
3327 Int128 float128_to_int128(float128 a, float_status *s)
3328 {
3329     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3330 }
3331 
3332 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3333 {
3334     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3335 }
3336 
3337 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3338 {
3339     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3340 }
3341 
3342 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3343 {
3344     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3345 }
3346 
3347 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3348 {
3349     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3350 }
3351 
3352 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3353 {
3354     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3355 }
3356 
3357 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3358 {
3359     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3360 }
3361 
3362 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3363 {
3364     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3365 }
3366 
3367 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3368 {
3369     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3370 }
3371 
3372 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3373 {
3374     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3375 }
3376 
3377 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3378 {
3379     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3380 }
3381 
3382 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3383 {
3384     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3385 }
3386 
3387 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3388 {
3389     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3390 }
3391 
3392 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3393 {
3394     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3395 }
3396 
3397 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3398 {
3399     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3400 }
3401 
3402 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3403 {
3404     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3405 }
3406 
3407 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3408 {
3409     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3410 }
3411 
3412 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3413 {
3414     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3415 }
3416 
3417 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3418 {
3419     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3420 }
3421 
3422 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3423 {
3424     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3425 }
3426 
3427 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3428 {
3429     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3430 }
3431 
3432 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3433 {
3434     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3435 }
3436 
3437 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3438 {
3439     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3440 }
3441 
3442 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3443 {
3444     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3445 }
3446 
3447 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3448 {
3449     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3450 }
3451 
3452 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3453                                 float_status *s)
3454 {
3455     FloatParts64 p;
3456 
3457     float64_unpack_canonical(&p, a, s);
3458     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3459 }
3460 
3461 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3462                                 float_status *s)
3463 {
3464     FloatParts64 p;
3465 
3466     float64_unpack_canonical(&p, a, s);
3467     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3468 }
3469 
3470 /*
3471  * Floating-point to unsigned integer conversions
3472  */
3473 
3474 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3475                                 float_status *s)
3476 {
3477     FloatParts64 p;
3478 
3479     float16_unpack_canonical(&p, a, s);
3480     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3481 }
3482 
3483 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3484                                   float_status *s)
3485 {
3486     FloatParts64 p;
3487 
3488     float16_unpack_canonical(&p, a, s);
3489     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3490 }
3491 
3492 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3493                                   float_status *s)
3494 {
3495     FloatParts64 p;
3496 
3497     float16_unpack_canonical(&p, a, s);
3498     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3499 }
3500 
3501 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3502                                   float_status *s)
3503 {
3504     FloatParts64 p;
3505 
3506     float16_unpack_canonical(&p, a, s);
3507     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3508 }
3509 
3510 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3511                                   float_status *s)
3512 {
3513     FloatParts64 p;
3514 
3515     float32_unpack_canonical(&p, a, s);
3516     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3517 }
3518 
3519 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3520                                   float_status *s)
3521 {
3522     FloatParts64 p;
3523 
3524     float32_unpack_canonical(&p, a, s);
3525     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3526 }
3527 
3528 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3529                                   float_status *s)
3530 {
3531     FloatParts64 p;
3532 
3533     float32_unpack_canonical(&p, a, s);
3534     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3535 }
3536 
3537 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3538                                   float_status *s)
3539 {
3540     FloatParts64 p;
3541 
3542     float64_unpack_canonical(&p, a, s);
3543     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3544 }
3545 
3546 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3547                                   float_status *s)
3548 {
3549     FloatParts64 p;
3550 
3551     float64_unpack_canonical(&p, a, s);
3552     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3553 }
3554 
3555 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3556                                   float_status *s)
3557 {
3558     FloatParts64 p;
3559 
3560     float64_unpack_canonical(&p, a, s);
3561     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3562 }
3563 
3564 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3565                                  int scale, float_status *s)
3566 {
3567     FloatParts64 p;
3568 
3569     bfloat16_unpack_canonical(&p, a, s);
3570     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3571 }
3572 
3573 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3574                                    int scale, float_status *s)
3575 {
3576     FloatParts64 p;
3577 
3578     bfloat16_unpack_canonical(&p, a, s);
3579     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3580 }
3581 
3582 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3583                                    int scale, float_status *s)
3584 {
3585     FloatParts64 p;
3586 
3587     bfloat16_unpack_canonical(&p, a, s);
3588     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3589 }
3590 
3591 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3592                                    int scale, float_status *s)
3593 {
3594     FloatParts64 p;
3595 
3596     bfloat16_unpack_canonical(&p, a, s);
3597     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3598 }
3599 
3600 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3601                                           int scale, float_status *s)
3602 {
3603     FloatParts128 p;
3604 
3605     float128_unpack_canonical(&p, a, s);
3606     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3607 }
3608 
3609 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3610                                           int scale, float_status *s)
3611 {
3612     FloatParts128 p;
3613 
3614     float128_unpack_canonical(&p, a, s);
3615     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3616 }
3617 
3618 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3619                                          int scale, float_status *s)
3620 {
3621     int flags = 0;
3622     Int128 r;
3623     FloatParts128 p;
3624 
3625     float128_unpack_canonical(&p, a, s);
3626 
3627     switch (p.cls) {
3628     case float_class_snan:
3629         flags |= float_flag_invalid_snan;
3630         /* fall through */
3631     case float_class_qnan:
3632         flags |= float_flag_invalid;
3633         r = UINT128_MAX;
3634         break;
3635 
3636     case float_class_inf:
3637         flags = float_flag_invalid | float_flag_invalid_cvti;
3638         r = p.sign ? int128_zero() : UINT128_MAX;
3639         break;
3640 
3641     case float_class_zero:
3642         return int128_zero();
3643 
3644     case float_class_normal:
3645         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3646             flags = float_flag_inexact;
3647             if (p.cls == float_class_zero) {
3648                 r = int128_zero();
3649                 break;
3650             }
3651         }
3652 
3653         if (p.sign) {
3654             flags = float_flag_invalid | float_flag_invalid_cvti;
3655             r = int128_zero();
3656         } else if (p.exp <= 127) {
3657             int shift = 127 - p.exp;
3658             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3659         } else {
3660             flags = float_flag_invalid | float_flag_invalid_cvti;
3661             r = UINT128_MAX;
3662         }
3663         break;
3664 
3665     default:
3666         g_assert_not_reached();
3667     }
3668 
3669     float_raise(flags, s);
3670     return r;
3671 }
3672 
3673 uint8_t float16_to_uint8(float16 a, float_status *s)
3674 {
3675     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3676 }
3677 
3678 uint16_t float16_to_uint16(float16 a, float_status *s)
3679 {
3680     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3681 }
3682 
3683 uint32_t float16_to_uint32(float16 a, float_status *s)
3684 {
3685     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3686 }
3687 
3688 uint64_t float16_to_uint64(float16 a, float_status *s)
3689 {
3690     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3691 }
3692 
3693 uint16_t float32_to_uint16(float32 a, float_status *s)
3694 {
3695     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3696 }
3697 
3698 uint32_t float32_to_uint32(float32 a, float_status *s)
3699 {
3700     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3701 }
3702 
3703 uint64_t float32_to_uint64(float32 a, float_status *s)
3704 {
3705     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3706 }
3707 
3708 uint16_t float64_to_uint16(float64 a, float_status *s)
3709 {
3710     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3711 }
3712 
3713 uint32_t float64_to_uint32(float64 a, float_status *s)
3714 {
3715     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3716 }
3717 
3718 uint64_t float64_to_uint64(float64 a, float_status *s)
3719 {
3720     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3721 }
3722 
3723 uint32_t float128_to_uint32(float128 a, float_status *s)
3724 {
3725     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3726 }
3727 
3728 uint64_t float128_to_uint64(float128 a, float_status *s)
3729 {
3730     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3731 }
3732 
3733 Int128 float128_to_uint128(float128 a, float_status *s)
3734 {
3735     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3736 }
3737 
3738 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3739 {
3740     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3741 }
3742 
3743 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3744 {
3745     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3746 }
3747 
3748 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3749 {
3750     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3751 }
3752 
3753 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3754 {
3755     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3756 }
3757 
3758 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3759 {
3760     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3761 }
3762 
3763 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3764 {
3765     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3766 }
3767 
3768 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3769 {
3770     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3771 }
3772 
3773 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3774 {
3775     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3776 }
3777 
3778 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3779 {
3780     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3781 }
3782 
3783 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3784 {
3785     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3786 }
3787 
3788 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3789 {
3790     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3791 }
3792 
3793 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3794 {
3795     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3796 }
3797 
3798 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3799 {
3800     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3801 }
3802 
3803 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3804 {
3805     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3806 }
3807 
3808 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3809 {
3810     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3811 }
3812 
3813 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3814 {
3815     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3816 }
3817 
3818 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3819 {
3820     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3821 }
3822 
3823 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3824 {
3825     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3826 }
3827 
3828 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3829 {
3830     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3831 }
3832 
3833 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3834 {
3835     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3836 }
3837 
3838 /*
3839  * Signed integer to floating-point conversions
3840  */
3841 
3842 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3843 {
3844     FloatParts64 p;
3845 
3846     parts_sint_to_float(&p, a, scale, status);
3847     return float16_round_pack_canonical(&p, status);
3848 }
3849 
3850 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3851 {
3852     return int64_to_float16_scalbn(a, scale, status);
3853 }
3854 
3855 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3856 {
3857     return int64_to_float16_scalbn(a, scale, status);
3858 }
3859 
3860 float16 int64_to_float16(int64_t a, float_status *status)
3861 {
3862     return int64_to_float16_scalbn(a, 0, status);
3863 }
3864 
3865 float16 int32_to_float16(int32_t a, float_status *status)
3866 {
3867     return int64_to_float16_scalbn(a, 0, status);
3868 }
3869 
3870 float16 int16_to_float16(int16_t a, float_status *status)
3871 {
3872     return int64_to_float16_scalbn(a, 0, status);
3873 }
3874 
3875 float16 int8_to_float16(int8_t a, float_status *status)
3876 {
3877     return int64_to_float16_scalbn(a, 0, status);
3878 }
3879 
3880 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3881 {
3882     FloatParts64 p;
3883 
3884     /* Without scaling, there are no overflow concerns. */
3885     if (likely(scale == 0) && can_use_fpu(status)) {
3886         union_float32 ur;
3887         ur.h = a;
3888         return ur.s;
3889     }
3890 
3891     parts64_sint_to_float(&p, a, scale, status);
3892     return float32_round_pack_canonical(&p, status);
3893 }
3894 
3895 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3896 {
3897     return int64_to_float32_scalbn(a, scale, status);
3898 }
3899 
3900 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3901 {
3902     return int64_to_float32_scalbn(a, scale, status);
3903 }
3904 
3905 float32 int64_to_float32(int64_t a, float_status *status)
3906 {
3907     return int64_to_float32_scalbn(a, 0, status);
3908 }
3909 
3910 float32 int32_to_float32(int32_t a, float_status *status)
3911 {
3912     return int64_to_float32_scalbn(a, 0, status);
3913 }
3914 
3915 float32 int16_to_float32(int16_t a, float_status *status)
3916 {
3917     return int64_to_float32_scalbn(a, 0, status);
3918 }
3919 
3920 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3921 {
3922     FloatParts64 p;
3923 
3924     /* Without scaling, there are no overflow concerns. */
3925     if (likely(scale == 0) && can_use_fpu(status)) {
3926         union_float64 ur;
3927         ur.h = a;
3928         return ur.s;
3929     }
3930 
3931     parts_sint_to_float(&p, a, scale, status);
3932     return float64_round_pack_canonical(&p, status);
3933 }
3934 
3935 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3936 {
3937     return int64_to_float64_scalbn(a, scale, status);
3938 }
3939 
3940 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3941 {
3942     return int64_to_float64_scalbn(a, scale, status);
3943 }
3944 
3945 float64 int64_to_float64(int64_t a, float_status *status)
3946 {
3947     return int64_to_float64_scalbn(a, 0, status);
3948 }
3949 
3950 float64 int32_to_float64(int32_t a, float_status *status)
3951 {
3952     return int64_to_float64_scalbn(a, 0, status);
3953 }
3954 
3955 float64 int16_to_float64(int16_t a, float_status *status)
3956 {
3957     return int64_to_float64_scalbn(a, 0, status);
3958 }
3959 
3960 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3961 {
3962     FloatParts64 p;
3963 
3964     parts_sint_to_float(&p, a, scale, status);
3965     return bfloat16_round_pack_canonical(&p, status);
3966 }
3967 
3968 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3969 {
3970     return int64_to_bfloat16_scalbn(a, scale, status);
3971 }
3972 
3973 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3974 {
3975     return int64_to_bfloat16_scalbn(a, scale, status);
3976 }
3977 
3978 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
3979 {
3980     return int64_to_bfloat16_scalbn(a, scale, status);
3981 }
3982 
3983 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3984 {
3985     return int64_to_bfloat16_scalbn(a, 0, status);
3986 }
3987 
3988 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3989 {
3990     return int64_to_bfloat16_scalbn(a, 0, status);
3991 }
3992 
3993 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3994 {
3995     return int64_to_bfloat16_scalbn(a, 0, status);
3996 }
3997 
3998 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
3999 {
4000     return int64_to_bfloat16_scalbn(a, 0, status);
4001 }
4002 
4003 float128 int128_to_float128(Int128 a, float_status *status)
4004 {
4005     FloatParts128 p = { };
4006     int shift;
4007 
4008     if (int128_nz(a)) {
4009         p.cls = float_class_normal;
4010         if (!int128_nonneg(a)) {
4011             p.sign = true;
4012             a = int128_neg(a);
4013         }
4014 
4015         shift = clz64(int128_gethi(a));
4016         if (shift == 64) {
4017             shift += clz64(int128_getlo(a));
4018         }
4019 
4020         p.exp = 127 - shift;
4021         a = int128_lshift(a, shift);
4022 
4023         p.frac_hi = int128_gethi(a);
4024         p.frac_lo = int128_getlo(a);
4025     } else {
4026         p.cls = float_class_zero;
4027     }
4028 
4029     return float128_round_pack_canonical(&p, status);
4030 }
4031 
4032 float128 int64_to_float128(int64_t a, float_status *status)
4033 {
4034     FloatParts128 p;
4035 
4036     parts_sint_to_float(&p, a, 0, status);
4037     return float128_round_pack_canonical(&p, status);
4038 }
4039 
4040 float128 int32_to_float128(int32_t a, float_status *status)
4041 {
4042     return int64_to_float128(a, status);
4043 }
4044 
4045 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4046 {
4047     FloatParts128 p;
4048 
4049     parts_sint_to_float(&p, a, 0, status);
4050     return floatx80_round_pack_canonical(&p, status);
4051 }
4052 
4053 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4054 {
4055     return int64_to_floatx80(a, status);
4056 }
4057 
4058 /*
4059  * Unsigned Integer to floating-point conversions
4060  */
4061 
4062 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4063 {
4064     FloatParts64 p;
4065 
4066     parts_uint_to_float(&p, a, scale, status);
4067     return float16_round_pack_canonical(&p, status);
4068 }
4069 
4070 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4071 {
4072     return uint64_to_float16_scalbn(a, scale, status);
4073 }
4074 
4075 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4076 {
4077     return uint64_to_float16_scalbn(a, scale, status);
4078 }
4079 
4080 float16 uint64_to_float16(uint64_t a, float_status *status)
4081 {
4082     return uint64_to_float16_scalbn(a, 0, status);
4083 }
4084 
4085 float16 uint32_to_float16(uint32_t a, float_status *status)
4086 {
4087     return uint64_to_float16_scalbn(a, 0, status);
4088 }
4089 
4090 float16 uint16_to_float16(uint16_t a, float_status *status)
4091 {
4092     return uint64_to_float16_scalbn(a, 0, status);
4093 }
4094 
4095 float16 uint8_to_float16(uint8_t a, float_status *status)
4096 {
4097     return uint64_to_float16_scalbn(a, 0, status);
4098 }
4099 
4100 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4101 {
4102     FloatParts64 p;
4103 
4104     /* Without scaling, there are no overflow concerns. */
4105     if (likely(scale == 0) && can_use_fpu(status)) {
4106         union_float32 ur;
4107         ur.h = a;
4108         return ur.s;
4109     }
4110 
4111     parts_uint_to_float(&p, a, scale, status);
4112     return float32_round_pack_canonical(&p, status);
4113 }
4114 
4115 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4116 {
4117     return uint64_to_float32_scalbn(a, scale, status);
4118 }
4119 
4120 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4121 {
4122     return uint64_to_float32_scalbn(a, scale, status);
4123 }
4124 
4125 float32 uint64_to_float32(uint64_t a, float_status *status)
4126 {
4127     return uint64_to_float32_scalbn(a, 0, status);
4128 }
4129 
4130 float32 uint32_to_float32(uint32_t a, float_status *status)
4131 {
4132     return uint64_to_float32_scalbn(a, 0, status);
4133 }
4134 
4135 float32 uint16_to_float32(uint16_t a, float_status *status)
4136 {
4137     return uint64_to_float32_scalbn(a, 0, status);
4138 }
4139 
4140 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4141 {
4142     FloatParts64 p;
4143 
4144     /* Without scaling, there are no overflow concerns. */
4145     if (likely(scale == 0) && can_use_fpu(status)) {
4146         union_float64 ur;
4147         ur.h = a;
4148         return ur.s;
4149     }
4150 
4151     parts_uint_to_float(&p, a, scale, status);
4152     return float64_round_pack_canonical(&p, status);
4153 }
4154 
4155 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4156 {
4157     return uint64_to_float64_scalbn(a, scale, status);
4158 }
4159 
4160 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4161 {
4162     return uint64_to_float64_scalbn(a, scale, status);
4163 }
4164 
4165 float64 uint64_to_float64(uint64_t a, float_status *status)
4166 {
4167     return uint64_to_float64_scalbn(a, 0, status);
4168 }
4169 
4170 float64 uint32_to_float64(uint32_t a, float_status *status)
4171 {
4172     return uint64_to_float64_scalbn(a, 0, status);
4173 }
4174 
4175 float64 uint16_to_float64(uint16_t a, float_status *status)
4176 {
4177     return uint64_to_float64_scalbn(a, 0, status);
4178 }
4179 
4180 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4181 {
4182     FloatParts64 p;
4183 
4184     parts_uint_to_float(&p, a, scale, status);
4185     return bfloat16_round_pack_canonical(&p, status);
4186 }
4187 
4188 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4189 {
4190     return uint64_to_bfloat16_scalbn(a, scale, status);
4191 }
4192 
4193 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4194 {
4195     return uint64_to_bfloat16_scalbn(a, scale, status);
4196 }
4197 
4198 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4199 {
4200     return uint64_to_bfloat16_scalbn(a, scale, status);
4201 }
4202 
4203 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4204 {
4205     return uint64_to_bfloat16_scalbn(a, 0, status);
4206 }
4207 
4208 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4209 {
4210     return uint64_to_bfloat16_scalbn(a, 0, status);
4211 }
4212 
4213 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4214 {
4215     return uint64_to_bfloat16_scalbn(a, 0, status);
4216 }
4217 
4218 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4219 {
4220     return uint64_to_bfloat16_scalbn(a, 0, status);
4221 }
4222 
4223 float128 uint64_to_float128(uint64_t a, float_status *status)
4224 {
4225     FloatParts128 p;
4226 
4227     parts_uint_to_float(&p, a, 0, status);
4228     return float128_round_pack_canonical(&p, status);
4229 }
4230 
4231 float128 uint128_to_float128(Int128 a, float_status *status)
4232 {
4233     FloatParts128 p = { };
4234     int shift;
4235 
4236     if (int128_nz(a)) {
4237         p.cls = float_class_normal;
4238 
4239         shift = clz64(int128_gethi(a));
4240         if (shift == 64) {
4241             shift += clz64(int128_getlo(a));
4242         }
4243 
4244         p.exp = 127 - shift;
4245         a = int128_lshift(a, shift);
4246 
4247         p.frac_hi = int128_gethi(a);
4248         p.frac_lo = int128_getlo(a);
4249     } else {
4250         p.cls = float_class_zero;
4251     }
4252 
4253     return float128_round_pack_canonical(&p, status);
4254 }
4255 
4256 /*
4257  * Minimum and maximum
4258  */
4259 
4260 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4261 {
4262     FloatParts64 pa, pb, *pr;
4263 
4264     float16_unpack_canonical(&pa, a, s);
4265     float16_unpack_canonical(&pb, b, s);
4266     pr = parts_minmax(&pa, &pb, s, flags);
4267 
4268     return float16_round_pack_canonical(pr, s);
4269 }
4270 
4271 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4272                                 float_status *s, int flags)
4273 {
4274     FloatParts64 pa, pb, *pr;
4275 
4276     bfloat16_unpack_canonical(&pa, a, s);
4277     bfloat16_unpack_canonical(&pb, b, s);
4278     pr = parts_minmax(&pa, &pb, s, flags);
4279 
4280     return bfloat16_round_pack_canonical(pr, s);
4281 }
4282 
4283 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4284 {
4285     FloatParts64 pa, pb, *pr;
4286 
4287     float32_unpack_canonical(&pa, a, s);
4288     float32_unpack_canonical(&pb, b, s);
4289     pr = parts_minmax(&pa, &pb, s, flags);
4290 
4291     return float32_round_pack_canonical(pr, s);
4292 }
4293 
4294 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4295 {
4296     FloatParts64 pa, pb, *pr;
4297 
4298     float64_unpack_canonical(&pa, a, s);
4299     float64_unpack_canonical(&pb, b, s);
4300     pr = parts_minmax(&pa, &pb, s, flags);
4301 
4302     return float64_round_pack_canonical(pr, s);
4303 }
4304 
4305 static float128 float128_minmax(float128 a, float128 b,
4306                                 float_status *s, int flags)
4307 {
4308     FloatParts128 pa, pb, *pr;
4309 
4310     float128_unpack_canonical(&pa, a, s);
4311     float128_unpack_canonical(&pb, b, s);
4312     pr = parts_minmax(&pa, &pb, s, flags);
4313 
4314     return float128_round_pack_canonical(pr, s);
4315 }
4316 
4317 #define MINMAX_1(type, name, flags) \
4318     type type##_##name(type a, type b, float_status *s) \
4319     { return type##_minmax(a, b, s, flags); }
4320 
4321 #define MINMAX_2(type) \
4322     MINMAX_1(type, max, 0)                                                \
4323     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4324     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4325     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4326     MINMAX_1(type, min, minmax_ismin)                                     \
4327     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4328     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4329     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4330 
4331 MINMAX_2(float16)
4332 MINMAX_2(bfloat16)
4333 MINMAX_2(float32)
4334 MINMAX_2(float64)
4335 MINMAX_2(float128)
4336 
4337 #undef MINMAX_1
4338 #undef MINMAX_2
4339 
4340 /*
4341  * Floating point compare
4342  */
4343 
4344 static FloatRelation QEMU_FLATTEN
4345 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4346 {
4347     FloatParts64 pa, pb;
4348 
4349     float16_unpack_canonical(&pa, a, s);
4350     float16_unpack_canonical(&pb, b, s);
4351     return parts_compare(&pa, &pb, s, is_quiet);
4352 }
4353 
4354 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4355 {
4356     return float16_do_compare(a, b, s, false);
4357 }
4358 
4359 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4360 {
4361     return float16_do_compare(a, b, s, true);
4362 }
4363 
4364 static FloatRelation QEMU_SOFTFLOAT_ATTR
4365 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4366 {
4367     FloatParts64 pa, pb;
4368 
4369     float32_unpack_canonical(&pa, a, s);
4370     float32_unpack_canonical(&pb, b, s);
4371     return parts_compare(&pa, &pb, s, is_quiet);
4372 }
4373 
4374 static FloatRelation QEMU_FLATTEN
4375 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4376 {
4377     union_float32 ua, ub;
4378 
4379     ua.s = xa;
4380     ub.s = xb;
4381 
4382     if (QEMU_NO_HARDFLOAT) {
4383         goto soft;
4384     }
4385 
4386     float32_input_flush2(&ua.s, &ub.s, s);
4387     if (isgreaterequal(ua.h, ub.h)) {
4388         if (isgreater(ua.h, ub.h)) {
4389             return float_relation_greater;
4390         }
4391         return float_relation_equal;
4392     }
4393     if (likely(isless(ua.h, ub.h))) {
4394         return float_relation_less;
4395     }
4396     /*
4397      * The only condition remaining is unordered.
4398      * Fall through to set flags.
4399      */
4400  soft:
4401     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4402 }
4403 
4404 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4405 {
4406     return float32_hs_compare(a, b, s, false);
4407 }
4408 
4409 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4410 {
4411     return float32_hs_compare(a, b, s, true);
4412 }
4413 
4414 static FloatRelation QEMU_SOFTFLOAT_ATTR
4415 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4416 {
4417     FloatParts64 pa, pb;
4418 
4419     float64_unpack_canonical(&pa, a, s);
4420     float64_unpack_canonical(&pb, b, s);
4421     return parts_compare(&pa, &pb, s, is_quiet);
4422 }
4423 
4424 static FloatRelation QEMU_FLATTEN
4425 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4426 {
4427     union_float64 ua, ub;
4428 
4429     ua.s = xa;
4430     ub.s = xb;
4431 
4432     if (QEMU_NO_HARDFLOAT) {
4433         goto soft;
4434     }
4435 
4436     float64_input_flush2(&ua.s, &ub.s, s);
4437     if (isgreaterequal(ua.h, ub.h)) {
4438         if (isgreater(ua.h, ub.h)) {
4439             return float_relation_greater;
4440         }
4441         return float_relation_equal;
4442     }
4443     if (likely(isless(ua.h, ub.h))) {
4444         return float_relation_less;
4445     }
4446     /*
4447      * The only condition remaining is unordered.
4448      * Fall through to set flags.
4449      */
4450  soft:
4451     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4452 }
4453 
4454 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4455 {
4456     return float64_hs_compare(a, b, s, false);
4457 }
4458 
4459 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4460 {
4461     return float64_hs_compare(a, b, s, true);
4462 }
4463 
4464 static FloatRelation QEMU_FLATTEN
4465 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4466 {
4467     FloatParts64 pa, pb;
4468 
4469     bfloat16_unpack_canonical(&pa, a, s);
4470     bfloat16_unpack_canonical(&pb, b, s);
4471     return parts_compare(&pa, &pb, s, is_quiet);
4472 }
4473 
4474 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4475 {
4476     return bfloat16_do_compare(a, b, s, false);
4477 }
4478 
4479 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4480 {
4481     return bfloat16_do_compare(a, b, s, true);
4482 }
4483 
4484 static FloatRelation QEMU_FLATTEN
4485 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4486 {
4487     FloatParts128 pa, pb;
4488 
4489     float128_unpack_canonical(&pa, a, s);
4490     float128_unpack_canonical(&pb, b, s);
4491     return parts_compare(&pa, &pb, s, is_quiet);
4492 }
4493 
4494 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4495 {
4496     return float128_do_compare(a, b, s, false);
4497 }
4498 
4499 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4500 {
4501     return float128_do_compare(a, b, s, true);
4502 }
4503 
4504 static FloatRelation QEMU_FLATTEN
4505 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4506 {
4507     FloatParts128 pa, pb;
4508 
4509     if (!floatx80_unpack_canonical(&pa, a, s) ||
4510         !floatx80_unpack_canonical(&pb, b, s)) {
4511         return float_relation_unordered;
4512     }
4513     return parts_compare(&pa, &pb, s, is_quiet);
4514 }
4515 
4516 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4517 {
4518     return floatx80_do_compare(a, b, s, false);
4519 }
4520 
4521 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4522 {
4523     return floatx80_do_compare(a, b, s, true);
4524 }
4525 
4526 /*
4527  * Scale by 2**N
4528  */
4529 
4530 float16 float16_scalbn(float16 a, int n, float_status *status)
4531 {
4532     FloatParts64 p;
4533 
4534     float16_unpack_canonical(&p, a, status);
4535     parts_scalbn(&p, n, status);
4536     return float16_round_pack_canonical(&p, status);
4537 }
4538 
4539 float32 float32_scalbn(float32 a, int n, float_status *status)
4540 {
4541     FloatParts64 p;
4542 
4543     float32_unpack_canonical(&p, a, status);
4544     parts_scalbn(&p, n, status);
4545     return float32_round_pack_canonical(&p, status);
4546 }
4547 
4548 float64 float64_scalbn(float64 a, int n, float_status *status)
4549 {
4550     FloatParts64 p;
4551 
4552     float64_unpack_canonical(&p, a, status);
4553     parts_scalbn(&p, n, status);
4554     return float64_round_pack_canonical(&p, status);
4555 }
4556 
4557 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4558 {
4559     FloatParts64 p;
4560 
4561     bfloat16_unpack_canonical(&p, a, status);
4562     parts_scalbn(&p, n, status);
4563     return bfloat16_round_pack_canonical(&p, status);
4564 }
4565 
4566 float128 float128_scalbn(float128 a, int n, float_status *status)
4567 {
4568     FloatParts128 p;
4569 
4570     float128_unpack_canonical(&p, a, status);
4571     parts_scalbn(&p, n, status);
4572     return float128_round_pack_canonical(&p, status);
4573 }
4574 
4575 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4576 {
4577     FloatParts128 p;
4578 
4579     if (!floatx80_unpack_canonical(&p, a, status)) {
4580         return floatx80_default_nan(status);
4581     }
4582     parts_scalbn(&p, n, status);
4583     return floatx80_round_pack_canonical(&p, status);
4584 }
4585 
4586 /*
4587  * Square Root
4588  */
4589 
4590 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4591 {
4592     FloatParts64 p;
4593 
4594     float16_unpack_canonical(&p, a, status);
4595     parts_sqrt(&p, status, &float16_params);
4596     return float16_round_pack_canonical(&p, status);
4597 }
4598 
4599 static float32 QEMU_SOFTFLOAT_ATTR
4600 soft_f32_sqrt(float32 a, float_status *status)
4601 {
4602     FloatParts64 p;
4603 
4604     float32_unpack_canonical(&p, a, status);
4605     parts_sqrt(&p, status, &float32_params);
4606     return float32_round_pack_canonical(&p, status);
4607 }
4608 
4609 static float64 QEMU_SOFTFLOAT_ATTR
4610 soft_f64_sqrt(float64 a, float_status *status)
4611 {
4612     FloatParts64 p;
4613 
4614     float64_unpack_canonical(&p, a, status);
4615     parts_sqrt(&p, status, &float64_params);
4616     return float64_round_pack_canonical(&p, status);
4617 }
4618 
4619 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4620 {
4621     union_float32 ua, ur;
4622 
4623     ua.s = xa;
4624     if (unlikely(!can_use_fpu(s))) {
4625         goto soft;
4626     }
4627 
4628     float32_input_flush1(&ua.s, s);
4629     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4630         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4631                        fpclassify(ua.h) == FP_ZERO) ||
4632                      signbit(ua.h))) {
4633             goto soft;
4634         }
4635     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4636                         float32_is_neg(ua.s))) {
4637         goto soft;
4638     }
4639     ur.h = sqrtf(ua.h);
4640     return ur.s;
4641 
4642  soft:
4643     return soft_f32_sqrt(ua.s, s);
4644 }
4645 
4646 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4647 {
4648     union_float64 ua, ur;
4649 
4650     ua.s = xa;
4651     if (unlikely(!can_use_fpu(s))) {
4652         goto soft;
4653     }
4654 
4655     float64_input_flush1(&ua.s, s);
4656     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4657         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4658                        fpclassify(ua.h) == FP_ZERO) ||
4659                      signbit(ua.h))) {
4660             goto soft;
4661         }
4662     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4663                         float64_is_neg(ua.s))) {
4664         goto soft;
4665     }
4666     ur.h = sqrt(ua.h);
4667     return ur.s;
4668 
4669  soft:
4670     return soft_f64_sqrt(ua.s, s);
4671 }
4672 
4673 float64 float64r32_sqrt(float64 a, float_status *status)
4674 {
4675     FloatParts64 p;
4676 
4677     float64_unpack_canonical(&p, a, status);
4678     parts_sqrt(&p, status, &float64_params);
4679     return float64r32_round_pack_canonical(&p, status);
4680 }
4681 
4682 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4683 {
4684     FloatParts64 p;
4685 
4686     bfloat16_unpack_canonical(&p, a, status);
4687     parts_sqrt(&p, status, &bfloat16_params);
4688     return bfloat16_round_pack_canonical(&p, status);
4689 }
4690 
4691 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4692 {
4693     FloatParts128 p;
4694 
4695     float128_unpack_canonical(&p, a, status);
4696     parts_sqrt(&p, status, &float128_params);
4697     return float128_round_pack_canonical(&p, status);
4698 }
4699 
4700 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4701 {
4702     FloatParts128 p;
4703 
4704     if (!floatx80_unpack_canonical(&p, a, s)) {
4705         return floatx80_default_nan(s);
4706     }
4707     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4708     return floatx80_round_pack_canonical(&p, s);
4709 }
4710 
4711 /*
4712  * log2
4713  */
4714 float32 float32_log2(float32 a, float_status *status)
4715 {
4716     FloatParts64 p;
4717 
4718     float32_unpack_canonical(&p, a, status);
4719     parts_log2(&p, status, &float32_params);
4720     return float32_round_pack_canonical(&p, status);
4721 }
4722 
4723 float64 float64_log2(float64 a, float_status *status)
4724 {
4725     FloatParts64 p;
4726 
4727     float64_unpack_canonical(&p, a, status);
4728     parts_log2(&p, status, &float64_params);
4729     return float64_round_pack_canonical(&p, status);
4730 }
4731 
4732 /*----------------------------------------------------------------------------
4733 | The pattern for a default generated NaN.
4734 *----------------------------------------------------------------------------*/
4735 
4736 float16 float16_default_nan(float_status *status)
4737 {
4738     FloatParts64 p;
4739 
4740     parts_default_nan(&p, status);
4741     p.frac >>= float16_params.frac_shift;
4742     return float16_pack_raw(&p);
4743 }
4744 
4745 float32 float32_default_nan(float_status *status)
4746 {
4747     FloatParts64 p;
4748 
4749     parts_default_nan(&p, status);
4750     p.frac >>= float32_params.frac_shift;
4751     return float32_pack_raw(&p);
4752 }
4753 
4754 float64 float64_default_nan(float_status *status)
4755 {
4756     FloatParts64 p;
4757 
4758     parts_default_nan(&p, status);
4759     p.frac >>= float64_params.frac_shift;
4760     return float64_pack_raw(&p);
4761 }
4762 
4763 float128 float128_default_nan(float_status *status)
4764 {
4765     FloatParts128 p;
4766 
4767     parts_default_nan(&p, status);
4768     frac_shr(&p, float128_params.frac_shift);
4769     return float128_pack_raw(&p);
4770 }
4771 
4772 bfloat16 bfloat16_default_nan(float_status *status)
4773 {
4774     FloatParts64 p;
4775 
4776     parts_default_nan(&p, status);
4777     p.frac >>= bfloat16_params.frac_shift;
4778     return bfloat16_pack_raw(&p);
4779 }
4780 
4781 /*----------------------------------------------------------------------------
4782 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4783 *----------------------------------------------------------------------------*/
4784 
4785 float16 float16_silence_nan(float16 a, float_status *status)
4786 {
4787     FloatParts64 p;
4788 
4789     float16_unpack_raw(&p, a);
4790     p.frac <<= float16_params.frac_shift;
4791     parts_silence_nan(&p, status);
4792     p.frac >>= float16_params.frac_shift;
4793     return float16_pack_raw(&p);
4794 }
4795 
4796 float32 float32_silence_nan(float32 a, float_status *status)
4797 {
4798     FloatParts64 p;
4799 
4800     float32_unpack_raw(&p, a);
4801     p.frac <<= float32_params.frac_shift;
4802     parts_silence_nan(&p, status);
4803     p.frac >>= float32_params.frac_shift;
4804     return float32_pack_raw(&p);
4805 }
4806 
4807 float64 float64_silence_nan(float64 a, float_status *status)
4808 {
4809     FloatParts64 p;
4810 
4811     float64_unpack_raw(&p, a);
4812     p.frac <<= float64_params.frac_shift;
4813     parts_silence_nan(&p, status);
4814     p.frac >>= float64_params.frac_shift;
4815     return float64_pack_raw(&p);
4816 }
4817 
4818 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4819 {
4820     FloatParts64 p;
4821 
4822     bfloat16_unpack_raw(&p, a);
4823     p.frac <<= bfloat16_params.frac_shift;
4824     parts_silence_nan(&p, status);
4825     p.frac >>= bfloat16_params.frac_shift;
4826     return bfloat16_pack_raw(&p);
4827 }
4828 
4829 float128 float128_silence_nan(float128 a, float_status *status)
4830 {
4831     FloatParts128 p;
4832 
4833     float128_unpack_raw(&p, a);
4834     frac_shl(&p, float128_params.frac_shift);
4835     parts_silence_nan(&p, status);
4836     frac_shr(&p, float128_params.frac_shift);
4837     return float128_pack_raw(&p);
4838 }
4839 
4840 /*----------------------------------------------------------------------------
4841 | If `a' is denormal and we are in flush-to-zero mode then set the
4842 | input-denormal exception and return zero. Otherwise just return the value.
4843 *----------------------------------------------------------------------------*/
4844 
4845 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4846 {
4847     if (p.exp == 0 && p.frac != 0) {
4848         float_raise(float_flag_input_denormal, status);
4849         return true;
4850     }
4851 
4852     return false;
4853 }
4854 
4855 float16 float16_squash_input_denormal(float16 a, float_status *status)
4856 {
4857     if (status->flush_inputs_to_zero) {
4858         FloatParts64 p;
4859 
4860         float16_unpack_raw(&p, a);
4861         if (parts_squash_denormal(p, status)) {
4862             return float16_set_sign(float16_zero, p.sign);
4863         }
4864     }
4865     return a;
4866 }
4867 
4868 float32 float32_squash_input_denormal(float32 a, float_status *status)
4869 {
4870     if (status->flush_inputs_to_zero) {
4871         FloatParts64 p;
4872 
4873         float32_unpack_raw(&p, a);
4874         if (parts_squash_denormal(p, status)) {
4875             return float32_set_sign(float32_zero, p.sign);
4876         }
4877     }
4878     return a;
4879 }
4880 
4881 float64 float64_squash_input_denormal(float64 a, float_status *status)
4882 {
4883     if (status->flush_inputs_to_zero) {
4884         FloatParts64 p;
4885 
4886         float64_unpack_raw(&p, a);
4887         if (parts_squash_denormal(p, status)) {
4888             return float64_set_sign(float64_zero, p.sign);
4889         }
4890     }
4891     return a;
4892 }
4893 
4894 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4895 {
4896     if (status->flush_inputs_to_zero) {
4897         FloatParts64 p;
4898 
4899         bfloat16_unpack_raw(&p, a);
4900         if (parts_squash_denormal(p, status)) {
4901             return bfloat16_set_sign(bfloat16_zero, p.sign);
4902         }
4903     }
4904     return a;
4905 }
4906 
4907 /*----------------------------------------------------------------------------
4908 | Normalizes the subnormal extended double-precision floating-point value
4909 | represented by the denormalized significand `aSig'.  The normalized exponent
4910 | and significand are stored at the locations pointed to by `zExpPtr' and
4911 | `zSigPtr', respectively.
4912 *----------------------------------------------------------------------------*/
4913 
4914 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4915                                 uint64_t *zSigPtr)
4916 {
4917     int8_t shiftCount;
4918 
4919     shiftCount = clz64(aSig);
4920     *zSigPtr = aSig<<shiftCount;
4921     *zExpPtr = 1 - shiftCount;
4922 }
4923 
4924 /*----------------------------------------------------------------------------
4925 | Takes two extended double-precision floating-point values `a' and `b', one
4926 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
4927 | `b' is a signaling NaN, the invalid exception is raised.
4928 *----------------------------------------------------------------------------*/
4929 
4930 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
4931 {
4932     FloatParts128 pa, pb, *pr;
4933 
4934     if (!floatx80_unpack_canonical(&pa, a, status) ||
4935         !floatx80_unpack_canonical(&pb, b, status)) {
4936         return floatx80_default_nan(status);
4937     }
4938 
4939     pr = parts_pick_nan(&pa, &pb, status);
4940     return floatx80_round_pack_canonical(pr, status);
4941 }
4942 
4943 /*----------------------------------------------------------------------------
4944 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4945 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4946 | and returns the proper extended double-precision floating-point value
4947 | corresponding to the abstract input.  Ordinarily, the abstract value is
4948 | rounded and packed into the extended double-precision format, with the
4949 | inexact exception raised if the abstract input cannot be represented
4950 | exactly.  However, if the abstract value is too large, the overflow and
4951 | inexact exceptions are raised and an infinity or maximal finite value is
4952 | returned.  If the abstract value is too small, the input value is rounded to
4953 | a subnormal number, and the underflow and inexact exceptions are raised if
4954 | the abstract input cannot be represented exactly as a subnormal extended
4955 | double-precision floating-point number.
4956 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4957 | the result is rounded to the same number of bits as single or double
4958 | precision, respectively.  Otherwise, the result is rounded to the full
4959 | precision of the extended double-precision format.
4960 |     The input significand must be normalized or smaller.  If the input
4961 | significand is not normalized, `zExp' must be 0; in that case, the result
4962 | returned is a subnormal number, and it must not require rounding.  The
4963 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4964 | Floating-Point Arithmetic.
4965 *----------------------------------------------------------------------------*/
4966 
4967 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4968                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4969                               float_status *status)
4970 {
4971     FloatRoundMode roundingMode;
4972     bool roundNearestEven, increment, isTiny;
4973     int64_t roundIncrement, roundMask, roundBits;
4974 
4975     roundingMode = status->float_rounding_mode;
4976     roundNearestEven = ( roundingMode == float_round_nearest_even );
4977     switch (roundingPrecision) {
4978     case floatx80_precision_x:
4979         goto precision80;
4980     case floatx80_precision_d:
4981         roundIncrement = UINT64_C(0x0000000000000400);
4982         roundMask = UINT64_C(0x00000000000007FF);
4983         break;
4984     case floatx80_precision_s:
4985         roundIncrement = UINT64_C(0x0000008000000000);
4986         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4987         break;
4988     default:
4989         g_assert_not_reached();
4990     }
4991     zSig0 |= ( zSig1 != 0 );
4992     switch (roundingMode) {
4993     case float_round_nearest_even:
4994     case float_round_ties_away:
4995         break;
4996     case float_round_to_zero:
4997         roundIncrement = 0;
4998         break;
4999     case float_round_up:
5000         roundIncrement = zSign ? 0 : roundMask;
5001         break;
5002     case float_round_down:
5003         roundIncrement = zSign ? roundMask : 0;
5004         break;
5005     default:
5006         abort();
5007     }
5008     roundBits = zSig0 & roundMask;
5009     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5010         if (    ( 0x7FFE < zExp )
5011              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
5012            ) {
5013             goto overflow;
5014         }
5015         if ( zExp <= 0 ) {
5016             if (status->flush_to_zero) {
5017                 float_raise(float_flag_output_denormal, status);
5018                 return packFloatx80(zSign, 0, 0);
5019             }
5020             isTiny = status->tininess_before_rounding
5021                   || (zExp < 0 )
5022                   || (zSig0 <= zSig0 + roundIncrement);
5023             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5024             zExp = 0;
5025             roundBits = zSig0 & roundMask;
5026             if (isTiny && roundBits) {
5027                 float_raise(float_flag_underflow, status);
5028             }
5029             if (roundBits) {
5030                 float_raise(float_flag_inexact, status);
5031             }
5032             zSig0 += roundIncrement;
5033             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5034             roundIncrement = roundMask + 1;
5035             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5036                 roundMask |= roundIncrement;
5037             }
5038             zSig0 &= ~ roundMask;
5039             return packFloatx80( zSign, zExp, zSig0 );
5040         }
5041     }
5042     if (roundBits) {
5043         float_raise(float_flag_inexact, status);
5044     }
5045     zSig0 += roundIncrement;
5046     if ( zSig0 < roundIncrement ) {
5047         ++zExp;
5048         zSig0 = UINT64_C(0x8000000000000000);
5049     }
5050     roundIncrement = roundMask + 1;
5051     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5052         roundMask |= roundIncrement;
5053     }
5054     zSig0 &= ~ roundMask;
5055     if ( zSig0 == 0 ) zExp = 0;
5056     return packFloatx80( zSign, zExp, zSig0 );
5057  precision80:
5058     switch (roundingMode) {
5059     case float_round_nearest_even:
5060     case float_round_ties_away:
5061         increment = ((int64_t)zSig1 < 0);
5062         break;
5063     case float_round_to_zero:
5064         increment = 0;
5065         break;
5066     case float_round_up:
5067         increment = !zSign && zSig1;
5068         break;
5069     case float_round_down:
5070         increment = zSign && zSig1;
5071         break;
5072     default:
5073         abort();
5074     }
5075     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5076         if (    ( 0x7FFE < zExp )
5077              || (    ( zExp == 0x7FFE )
5078                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5079                   && increment
5080                 )
5081            ) {
5082             roundMask = 0;
5083  overflow:
5084             float_raise(float_flag_overflow | float_flag_inexact, status);
5085             if (    ( roundingMode == float_round_to_zero )
5086                  || ( zSign && ( roundingMode == float_round_up ) )
5087                  || ( ! zSign && ( roundingMode == float_round_down ) )
5088                ) {
5089                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5090             }
5091             return packFloatx80(zSign,
5092                                 floatx80_infinity_high,
5093                                 floatx80_infinity_low);
5094         }
5095         if ( zExp <= 0 ) {
5096             isTiny = status->tininess_before_rounding
5097                   || (zExp < 0)
5098                   || !increment
5099                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5100             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5101             zExp = 0;
5102             if (isTiny && zSig1) {
5103                 float_raise(float_flag_underflow, status);
5104             }
5105             if (zSig1) {
5106                 float_raise(float_flag_inexact, status);
5107             }
5108             switch (roundingMode) {
5109             case float_round_nearest_even:
5110             case float_round_ties_away:
5111                 increment = ((int64_t)zSig1 < 0);
5112                 break;
5113             case float_round_to_zero:
5114                 increment = 0;
5115                 break;
5116             case float_round_up:
5117                 increment = !zSign && zSig1;
5118                 break;
5119             case float_round_down:
5120                 increment = zSign && zSig1;
5121                 break;
5122             default:
5123                 abort();
5124             }
5125             if ( increment ) {
5126                 ++zSig0;
5127                 if (!(zSig1 << 1) && roundNearestEven) {
5128                     zSig0 &= ~1;
5129                 }
5130                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5131             }
5132             return packFloatx80( zSign, zExp, zSig0 );
5133         }
5134     }
5135     if (zSig1) {
5136         float_raise(float_flag_inexact, status);
5137     }
5138     if ( increment ) {
5139         ++zSig0;
5140         if ( zSig0 == 0 ) {
5141             ++zExp;
5142             zSig0 = UINT64_C(0x8000000000000000);
5143         }
5144         else {
5145             if (!(zSig1 << 1) && roundNearestEven) {
5146                 zSig0 &= ~1;
5147             }
5148         }
5149     }
5150     else {
5151         if ( zSig0 == 0 ) zExp = 0;
5152     }
5153     return packFloatx80( zSign, zExp, zSig0 );
5154 
5155 }
5156 
5157 /*----------------------------------------------------------------------------
5158 | Takes an abstract floating-point value having sign `zSign', exponent
5159 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5160 | and returns the proper extended double-precision floating-point value
5161 | corresponding to the abstract input.  This routine is just like
5162 | `roundAndPackFloatx80' except that the input significand does not have to be
5163 | normalized.
5164 *----------------------------------------------------------------------------*/
5165 
5166 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5167                                        bool zSign, int32_t zExp,
5168                                        uint64_t zSig0, uint64_t zSig1,
5169                                        float_status *status)
5170 {
5171     int8_t shiftCount;
5172 
5173     if ( zSig0 == 0 ) {
5174         zSig0 = zSig1;
5175         zSig1 = 0;
5176         zExp -= 64;
5177     }
5178     shiftCount = clz64(zSig0);
5179     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5180     zExp -= shiftCount;
5181     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5182                                 zSig0, zSig1, status);
5183 
5184 }
5185 
5186 /*----------------------------------------------------------------------------
5187 | Returns the binary exponential of the single-precision floating-point value
5188 | `a'. The operation is performed according to the IEC/IEEE Standard for
5189 | Binary Floating-Point Arithmetic.
5190 |
5191 | Uses the following identities:
5192 |
5193 | 1. -------------------------------------------------------------------------
5194 |      x    x*ln(2)
5195 |     2  = e
5196 |
5197 | 2. -------------------------------------------------------------------------
5198 |                      2     3     4     5           n
5199 |      x        x     x     x     x     x           x
5200 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5201 |               1!    2!    3!    4!    5!          n!
5202 *----------------------------------------------------------------------------*/
5203 
5204 static const float64 float32_exp2_coefficients[15] =
5205 {
5206     const_float64( 0x3ff0000000000000ll ), /*  1 */
5207     const_float64( 0x3fe0000000000000ll ), /*  2 */
5208     const_float64( 0x3fc5555555555555ll ), /*  3 */
5209     const_float64( 0x3fa5555555555555ll ), /*  4 */
5210     const_float64( 0x3f81111111111111ll ), /*  5 */
5211     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5212     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5213     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5214     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5215     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5216     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5217     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5218     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5219     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5220     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5221 };
5222 
5223 float32 float32_exp2(float32 a, float_status *status)
5224 {
5225     FloatParts64 xp, xnp, tp, rp;
5226     int i;
5227 
5228     float32_unpack_canonical(&xp, a, status);
5229     if (unlikely(xp.cls != float_class_normal)) {
5230         switch (xp.cls) {
5231         case float_class_snan:
5232         case float_class_qnan:
5233             parts_return_nan(&xp, status);
5234             return float32_round_pack_canonical(&xp, status);
5235         case float_class_inf:
5236             return xp.sign ? float32_zero : a;
5237         case float_class_zero:
5238             return float32_one;
5239         default:
5240             break;
5241         }
5242         g_assert_not_reached();
5243     }
5244 
5245     float_raise(float_flag_inexact, status);
5246 
5247     float64_unpack_canonical(&tp, float64_ln2, status);
5248     xp = *parts_mul(&xp, &tp, status);
5249     xnp = xp;
5250 
5251     float64_unpack_canonical(&rp, float64_one, status);
5252     for (i = 0 ; i < 15 ; i++) {
5253 
5254         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5255         rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
5256         xnp = *parts_mul(&xnp, &xp, status);
5257     }
5258 
5259     return float32_round_pack_canonical(&rp, status);
5260 }
5261 
5262 /*----------------------------------------------------------------------------
5263 | Rounds the extended double-precision floating-point value `a'
5264 | to the precision provided by floatx80_rounding_precision and returns the
5265 | result as an extended double-precision floating-point value.
5266 | The operation is performed according to the IEC/IEEE Standard for Binary
5267 | Floating-Point Arithmetic.
5268 *----------------------------------------------------------------------------*/
5269 
5270 floatx80 floatx80_round(floatx80 a, float_status *status)
5271 {
5272     FloatParts128 p;
5273 
5274     if (!floatx80_unpack_canonical(&p, a, status)) {
5275         return floatx80_default_nan(status);
5276     }
5277     return floatx80_round_pack_canonical(&p, status);
5278 }
5279 
5280 static void __attribute__((constructor)) softfloat_init(void)
5281 {
5282     union_float64 ua, ub, uc, ur;
5283 
5284     if (QEMU_NO_HARDFLOAT) {
5285         return;
5286     }
5287     /*
5288      * Test that the host's FMA is not obviously broken. For example,
5289      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5290      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5291      */
5292     ua.s = 0x0020000000000001ULL;
5293     ub.s = 0x3ca0000000000000ULL;
5294     uc.s = 0x0020000000000000ULL;
5295     ur.h = fma(ua.h, ub.h, uc.h);
5296     if (ur.s != 0x0020000000000001ULL) {
5297         force_soft_fma = true;
5298     }
5299 }
5300