xref: /qemu/fpu/softfloat.c (revision e4e5e89bbd8e731e86735d9d25b7b5f49e8f08b6)
1 /*
2  * QEMU float support
3  *
4  * The code in this source file is derived from release 2a of the SoftFloat
5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6  * some later contributions) are provided under that license, as detailed below.
7  * It has subsequently been modified by contributors to the QEMU Project,
8  * so some portions are provided under:
9  *  the SoftFloat-2a license
10  *  the BSD license
11  *  GPL-v2-or-later
12  *
13  * Any future contributions to this file after December 1st 2014 will be
14  * taken to be licensed under the Softfloat-2a license unless specifically
15  * indicated otherwise.
16  */
17 
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22 
23 Written by John R. Hauser.  This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704.  Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980.  The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32 
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43 
44 ===============================================================================
45 */
46 
47 /* BSD licensing:
48  * Copyright (c) 2006, Fabrice Bellard
49  * All rights reserved.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions are met:
53  *
54  * 1. Redistributions of source code must retain the above copyright notice,
55  * this list of conditions and the following disclaimer.
56  *
57  * 2. Redistributions in binary form must reproduce the above copyright notice,
58  * this list of conditions and the following disclaimer in the documentation
59  * and/or other materials provided with the distribution.
60  *
61  * 3. Neither the name of the copyright holder nor the names of its contributors
62  * may be used to endorse or promote products derived from this software without
63  * specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75  * THE POSSIBILITY OF SUCH DAMAGE.
76  */
77 
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79  * version 2 or later. See the COPYING file in the top-level directory.
80  */
81 
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83  * target-dependent and needs the TARGET_* macros.
84  */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89 
90 /* We only need stdlib for abort() */
91 
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations.  (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98 
99 /*
100  * Hardfloat
101  *
102  * Fast emulation of guest FP instructions is challenging for two reasons.
103  * First, FP instruction semantics are similar but not identical, particularly
104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
105  * exception flags is not trivial: reading the host's flags register with a
106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107  * and trapping on every FP exception is not fast nor pleasant to work with.
108  *
109  * We address these challenges by leveraging the host FPU for a subset of the
110  * operations. To do this we expand on the idea presented in this paper:
111  *
112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114  *
115  * The idea is thus to leverage the host FPU to (1) compute FP operations
116  * and (2) identify whether FP exceptions occurred while avoiding
117  * expensive exception flag register accesses.
118  *
119  * An important optimization shown in the paper is that given that exception
120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121  * This is particularly useful for the inexact flag, which is very frequently
122  * raised in floating-point workloads.
123  *
124  * We optimize the code further by deferring to soft-fp whenever FP exception
125  * detection might get hairy. Two examples: (1) when at least one operand is
126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127  * and the result is < the minimum normal.
128  */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130     static inline void name(soft_t *a, float_status *s)                 \
131     {                                                                   \
132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134                                      soft_t ## _is_neg(*a));            \
135             float_raise(float_flag_input_denormal, s);                  \
136         }                                                               \
137     }
138 
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142 
143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
144     static inline void name(soft_t *a, float_status *s) \
145     {                                                   \
146         if (likely(!s->flush_inputs_to_zero)) {         \
147             return;                                     \
148         }                                               \
149         soft_t ## _input_flush__nocheck(a, s);          \
150     }
151 
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155 
156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158     {                                                                   \
159         if (likely(!s->flush_inputs_to_zero)) {                         \
160             return;                                                     \
161         }                                                               \
162         soft_t ## _input_flush__nocheck(a, s);                          \
163         soft_t ## _input_flush__nocheck(b, s);                          \
164     }
165 
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169 
170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172     {                                                                   \
173         if (likely(!s->flush_inputs_to_zero)) {                         \
174             return;                                                     \
175         }                                                               \
176         soft_t ## _input_flush__nocheck(a, s);                          \
177         soft_t ## _input_flush__nocheck(b, s);                          \
178         soft_t ## _input_flush__nocheck(c, s);                          \
179     }
180 
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184 
185 /*
186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
187  * hardfloat functions. Each combination of number of inputs and float size
188  * gets its own value.
189  */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205 
206 /*
207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208  * float{32,64}_is_infinity when !USE_FP.
209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211  */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF   1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF   0
216 #endif
217 
218 /*
219  * Some targets clear the FP flags before most FP operations. This prevents
220  * the use of hardfloat, since hardfloat relies on the inexact flag being
221  * already set.
222  */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226     IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234 
235 static inline bool can_use_fpu(const float_status *s)
236 {
237     if (QEMU_NO_HARDFLOAT) {
238         return false;
239     }
240     return likely(s->float_exception_flags & float_flag_inexact &&
241                   s->float_rounding_mode == float_round_nearest_even);
242 }
243 
244 /*
245  * Hardfloat generation functions. Each operation can have two flavors:
246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247  * most condition checks, or native ones (e.g. fpclassify).
248  *
249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
250  * compiler to propagate constants and inline everything into the callers.
251  *
252  * We only generate functions for operations with two inputs, since only
253  * these are common enough to justify consolidating them into common code.
254  */
255 
256 typedef union {
257     float32 s;
258     float h;
259 } union_float32;
260 
261 typedef union {
262     float64 s;
263     double h;
264 } union_float64;
265 
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268 
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float   (*hard_f32_op2_fn)(float a, float b);
272 typedef double  (*hard_f64_op2_fn)(double a, double b);
273 
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
278         /*
279          * Not using a temp variable for consecutive fpclassify calls ends up
280          * generating faster code.
281          */
282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284     }
285     return float32_is_zero_or_normal(a.s) &&
286            float32_is_zero_or_normal(b.s);
287 }
288 
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294     }
295     return float64_is_zero_or_normal(a.s) &&
296            float64_is_zero_or_normal(b.s);
297 }
298 
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307     }
308     return float32_is_zero_or_normal(a.s) &&
309            float32_is_zero_or_normal(b.s) &&
310            float32_is_zero_or_normal(c.s);
311 }
312 
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320     }
321     return float64_is_zero_or_normal(a.s) &&
322            float64_is_zero_or_normal(b.s) &&
323            float64_is_zero_or_normal(c.s);
324 }
325 
326 static inline bool f32_is_inf(union_float32 a)
327 {
328     if (QEMU_HARDFLOAT_USE_ISINF) {
329         return isinf(a.h);
330     }
331     return float32_is_infinity(a.s);
332 }
333 
334 static inline bool f64_is_inf(union_float64 a)
335 {
336     if (QEMU_HARDFLOAT_USE_ISINF) {
337         return isinf(a.h);
338     }
339     return float64_is_infinity(a.s);
340 }
341 
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345              f32_check_fn pre, f32_check_fn post)
346 {
347     union_float32 ua, ub, ur;
348 
349     ua.s = xa;
350     ub.s = xb;
351 
352     if (unlikely(!can_use_fpu(s))) {
353         goto soft;
354     }
355 
356     float32_input_flush2(&ua.s, &ub.s, s);
357     if (unlikely(!pre(ua, ub))) {
358         goto soft;
359     }
360 
361     ur.h = hard(ua.h, ub.h);
362     if (unlikely(f32_is_inf(ur))) {
363         float_raise(float_flag_overflow, s);
364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365         goto soft;
366     }
367     return ur.s;
368 
369  soft:
370     return soft(ua.s, ub.s, s);
371 }
372 
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376              f64_check_fn pre, f64_check_fn post)
377 {
378     union_float64 ua, ub, ur;
379 
380     ua.s = xa;
381     ub.s = xb;
382 
383     if (unlikely(!can_use_fpu(s))) {
384         goto soft;
385     }
386 
387     float64_input_flush2(&ua.s, &ub.s, s);
388     if (unlikely(!pre(ua, ub))) {
389         goto soft;
390     }
391 
392     ur.h = hard(ua.h, ub.h);
393     if (unlikely(f64_is_inf(ur))) {
394         float_raise(float_flag_overflow, s);
395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396         goto soft;
397     }
398     return ur.s;
399 
400  soft:
401     return soft(ua.s, ub.s, s);
402 }
403 
404 /*
405  * Classify a floating point number. Everything above float_class_qnan
406  * is a NaN so cls >= float_class_qnan is any NaN.
407  */
408 
409 typedef enum __attribute__ ((__packed__)) {
410     float_class_unclassified,
411     float_class_zero,
412     float_class_normal,
413     float_class_inf,
414     float_class_qnan,  /* all NaNs from here */
415     float_class_snan,
416 } FloatClass;
417 
418 #define float_cmask(bit)  (1u << (bit))
419 
420 enum {
421     float_cmask_zero    = float_cmask(float_class_zero),
422     float_cmask_normal  = float_cmask(float_class_normal),
423     float_cmask_inf     = float_cmask(float_class_inf),
424     float_cmask_qnan    = float_cmask(float_class_qnan),
425     float_cmask_snan    = float_cmask(float_class_snan),
426 
427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429 };
430 
431 /* Flags for parts_minmax. */
432 enum {
433     /* Set for minimum; clear for maximum. */
434     minmax_ismin = 1,
435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436     minmax_isnum = 2,
437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438     minmax_ismag = 4,
439     /*
440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441      * operations.
442      */
443     minmax_isnumber = 8,
444 };
445 
446 /* Simple helpers for checking if, or what kind of, NaN we have */
447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
448 {
449     return unlikely(c >= float_class_qnan);
450 }
451 
452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
453 {
454     return c == float_class_snan;
455 }
456 
457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458 {
459     return c == float_class_qnan;
460 }
461 
462 /*
463  * Structure holding all of the decomposed parts of a float.
464  * The exponent is unbiased and the fraction is normalized.
465  *
466  * The fraction words are stored in big-endian word ordering,
467  * so that truncation from a larger format to a smaller format
468  * can be done simply by ignoring subsequent elements.
469  */
470 
471 typedef struct {
472     FloatClass cls;
473     bool sign;
474     int32_t exp;
475     union {
476         /* Routines that know the structure may reference the singular name. */
477         uint64_t frac;
478         /*
479          * Routines expanded with multiple structures reference "hi" and "lo"
480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
481          * both the same word and aliased here.
482          */
483         uint64_t frac_hi;
484         uint64_t frac_lo;
485     };
486 } FloatParts64;
487 
488 typedef struct {
489     FloatClass cls;
490     bool sign;
491     int32_t exp;
492     uint64_t frac_hi;
493     uint64_t frac_lo;
494 } FloatParts128;
495 
496 typedef struct {
497     FloatClass cls;
498     bool sign;
499     int32_t exp;
500     uint64_t frac_hi;
501     uint64_t frac_hm;  /* high-middle */
502     uint64_t frac_lm;  /* low-middle */
503     uint64_t frac_lo;
504 } FloatParts256;
505 
506 /* These apply to the most significant word of each FloatPartsN. */
507 #define DECOMPOSED_BINARY_POINT    63
508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
509 
510 /* Structure holding all of the relevant parameters for a format.
511  *   exp_size: the size of the exponent field
512  *   exp_bias: the offset applied to the exponent field
513  *   exp_max: the maximum normalised exponent
514  *   frac_size: the size of the fraction field
515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516  * The following are computed based the size of fraction
517  *   round_mask: bits below lsb which must be rounded
518  * The following optional modifiers are available:
519  *   arm_althp: handle ARM Alternative Half Precision
520  *   m68k_denormal: explicit integer bit for extended precision may be 1
521  */
522 typedef struct {
523     int exp_size;
524     int exp_bias;
525     int exp_re_bias;
526     int exp_max;
527     int frac_size;
528     int frac_shift;
529     bool arm_althp;
530     bool m68k_denormal;
531     uint64_t round_mask;
532 } FloatFmt;
533 
534 /* Expand fields based on the size of exponent and fraction */
535 #define FLOAT_PARAMS_(E)                                \
536     .exp_size       = E,                                \
537     .exp_bias       = ((1 << E) - 1) >> 1,              \
538     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
539     .exp_max        = (1 << E) - 1
540 
541 #define FLOAT_PARAMS(E, F)                              \
542     FLOAT_PARAMS_(E),                                   \
543     .frac_size      = F,                                \
544     .frac_shift     = (-F - 1) & 63,                    \
545     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
546 
547 static const FloatFmt float16_params = {
548     FLOAT_PARAMS(5, 10)
549 };
550 
551 static const FloatFmt float16_params_ahp = {
552     FLOAT_PARAMS(5, 10),
553     .arm_althp = true
554 };
555 
556 static const FloatFmt bfloat16_params = {
557     FLOAT_PARAMS(8, 7)
558 };
559 
560 static const FloatFmt float32_params = {
561     FLOAT_PARAMS(8, 23)
562 };
563 
564 static const FloatFmt float64_params = {
565     FLOAT_PARAMS(11, 52)
566 };
567 
568 static const FloatFmt float128_params = {
569     FLOAT_PARAMS(15, 112)
570 };
571 
572 #define FLOATX80_PARAMS(R)              \
573     FLOAT_PARAMS_(15),                  \
574     .frac_size = R == 64 ? 63 : R,      \
575     .frac_shift = 0,                    \
576     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
577 
578 static const FloatFmt floatx80_params[3] = {
579     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
580     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
581     [floatx80_precision_x] = {
582         FLOATX80_PARAMS(64),
583 #ifdef TARGET_M68K
584         .m68k_denormal = true,
585 #endif
586     },
587 };
588 
589 /* Unpack a float to parts, but do not canonicalize.  */
590 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
591 {
592     const int f_size = fmt->frac_size;
593     const int e_size = fmt->exp_size;
594 
595     *r = (FloatParts64) {
596         .cls = float_class_unclassified,
597         .sign = extract64(raw, f_size + e_size, 1),
598         .exp = extract64(raw, f_size, e_size),
599         .frac = extract64(raw, 0, f_size)
600     };
601 }
602 
603 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
604 {
605     unpack_raw64(p, &float16_params, f);
606 }
607 
608 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
609 {
610     unpack_raw64(p, &bfloat16_params, f);
611 }
612 
613 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
614 {
615     unpack_raw64(p, &float32_params, f);
616 }
617 
618 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
619 {
620     unpack_raw64(p, &float64_params, f);
621 }
622 
623 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
624 {
625     *p = (FloatParts128) {
626         .cls = float_class_unclassified,
627         .sign = extract32(f.high, 15, 1),
628         .exp = extract32(f.high, 0, 15),
629         .frac_hi = f.low
630     };
631 }
632 
633 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
634 {
635     const int f_size = float128_params.frac_size - 64;
636     const int e_size = float128_params.exp_size;
637 
638     *p = (FloatParts128) {
639         .cls = float_class_unclassified,
640         .sign = extract64(f.high, f_size + e_size, 1),
641         .exp = extract64(f.high, f_size, e_size),
642         .frac_hi = extract64(f.high, 0, f_size),
643         .frac_lo = f.low,
644     };
645 }
646 
647 /* Pack a float from parts, but do not canonicalize.  */
648 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
649 {
650     const int f_size = fmt->frac_size;
651     const int e_size = fmt->exp_size;
652     uint64_t ret;
653 
654     ret = (uint64_t)p->sign << (f_size + e_size);
655     ret = deposit64(ret, f_size, e_size, p->exp);
656     ret = deposit64(ret, 0, f_size, p->frac);
657     return ret;
658 }
659 
660 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
661 {
662     return make_float16(pack_raw64(p, &float16_params));
663 }
664 
665 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
666 {
667     return pack_raw64(p, &bfloat16_params);
668 }
669 
670 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
671 {
672     return make_float32(pack_raw64(p, &float32_params));
673 }
674 
675 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
676 {
677     return make_float64(pack_raw64(p, &float64_params));
678 }
679 
680 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
681 {
682     const int f_size = float128_params.frac_size - 64;
683     const int e_size = float128_params.exp_size;
684     uint64_t hi;
685 
686     hi = (uint64_t)p->sign << (f_size + e_size);
687     hi = deposit64(hi, f_size, e_size, p->exp);
688     hi = deposit64(hi, 0, f_size, p->frac_hi);
689     return make_float128(hi, p->frac_lo);
690 }
691 
692 /*----------------------------------------------------------------------------
693 | Functions and definitions to determine:  (1) whether tininess for underflow
694 | is detected before or after rounding by default, (2) what (if anything)
695 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
696 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
697 | are propagated from function inputs to output.  These details are target-
698 | specific.
699 *----------------------------------------------------------------------------*/
700 #include "softfloat-specialize.c.inc"
701 
702 #define PARTS_GENERIC_64_128(NAME, P) \
703     _Generic((P), FloatParts64 *: parts64_##NAME, \
704                   FloatParts128 *: parts128_##NAME)
705 
706 #define PARTS_GENERIC_64_128_256(NAME, P) \
707     _Generic((P), FloatParts64 *: parts64_##NAME, \
708                   FloatParts128 *: parts128_##NAME, \
709                   FloatParts256 *: parts256_##NAME)
710 
711 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
712 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
713 
714 static void parts64_return_nan(FloatParts64 *a, float_status *s);
715 static void parts128_return_nan(FloatParts128 *a, float_status *s);
716 
717 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
718 
719 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
720                                       float_status *s);
721 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
722                                         float_status *s);
723 
724 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
725 
726 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
727                                              FloatParts64 *c, float_status *s,
728                                              int ab_mask, int abc_mask);
729 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
730                                                FloatParts128 *b,
731                                                FloatParts128 *c,
732                                                float_status *s,
733                                                int ab_mask, int abc_mask);
734 
735 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
736     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
737 
738 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
739                                  const FloatFmt *fmt);
740 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
741                                   const FloatFmt *fmt);
742 
743 #define parts_canonicalize(A, S, F) \
744     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
745 
746 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
747                                    const FloatFmt *fmt);
748 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
749                                     const FloatFmt *fmt);
750 
751 #define parts_uncanon_normal(A, S, F) \
752     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
753 
754 static void parts64_uncanon(FloatParts64 *p, float_status *status,
755                             const FloatFmt *fmt);
756 static void parts128_uncanon(FloatParts128 *p, float_status *status,
757                              const FloatFmt *fmt);
758 
759 #define parts_uncanon(A, S, F) \
760     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
761 
762 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
763 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
764 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
765 
766 #define parts_add_normal(A, B) \
767     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
768 
769 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
770 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
771 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
772 
773 #define parts_sub_normal(A, B) \
774     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
775 
776 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
777                                     float_status *s, bool subtract);
778 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
779                                       float_status *s, bool subtract);
780 
781 #define parts_addsub(A, B, S, Z) \
782     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
783 
784 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
785                                  float_status *s);
786 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
787                                    float_status *s);
788 
789 #define parts_mul(A, B, S) \
790     PARTS_GENERIC_64_128(mul, A)(A, B, S)
791 
792 static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
793                                            FloatParts64 *c, int scale,
794                                            int flags, float_status *s);
795 static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
796                                              FloatParts128 *c, int scale,
797                                              int flags, float_status *s);
798 
799 #define parts_muladd_scalbn(A, B, C, Z, Y, S) \
800     PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
801 
802 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
803                                  float_status *s);
804 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
805                                    float_status *s);
806 
807 #define parts_div(A, B, S) \
808     PARTS_GENERIC_64_128(div, A)(A, B, S)
809 
810 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
811                                     uint64_t *mod_quot, float_status *s);
812 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
813                                       uint64_t *mod_quot, float_status *s);
814 
815 #define parts_modrem(A, B, Q, S) \
816     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
817 
818 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
819 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
820 
821 #define parts_sqrt(A, S, F) \
822     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
823 
824 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
825                                         int scale, int frac_size);
826 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
827                                          int scale, int frac_size);
828 
829 #define parts_round_to_int_normal(A, R, C, F) \
830     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
831 
832 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
833                                  int scale, float_status *s,
834                                  const FloatFmt *fmt);
835 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
836                                   int scale, float_status *s,
837                                   const FloatFmt *fmt);
838 
839 #define parts_round_to_int(A, R, C, S, F) \
840     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
841 
842 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
843                                      int scale, int64_t min, int64_t max,
844                                      float_status *s);
845 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
846                                      int scale, int64_t min, int64_t max,
847                                      float_status *s);
848 
849 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
850     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
851 
852 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
853                                       int scale, uint64_t max,
854                                       float_status *s);
855 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
856                                        int scale, uint64_t max,
857                                        float_status *s);
858 
859 #define parts_float_to_uint(P, R, Z, M, S) \
860     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
861 
862 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
863                                             FloatRoundMode rmode,
864                                             int bitsm1, float_status *s);
865 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
866                                              FloatRoundMode rmode,
867                                              int bitsm1, float_status *s);
868 
869 #define parts_float_to_sint_modulo(P, R, M, S) \
870     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
871 
872 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
873                                   int scale, float_status *s);
874 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
875                                    int scale, float_status *s);
876 
877 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
878     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
879 
880 #define parts_sint_to_float(P, I, Z, S) \
881     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
882 
883 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
884                                   int scale, float_status *s);
885 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
886                                    int scale, float_status *s);
887 
888 #define parts_uint_to_float(P, I, Z, S) \
889     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
890 
891 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
892                                     float_status *s, int flags);
893 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
894                                       float_status *s, int flags);
895 
896 #define parts_minmax(A, B, S, F) \
897     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
898 
899 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
900                                      float_status *s, bool q);
901 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
902                                       float_status *s, bool q);
903 
904 #define parts_compare(A, B, S, Q) \
905     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
906 
907 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
908 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
909 
910 #define parts_scalbn(A, N, S) \
911     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
912 
913 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
914 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
915 
916 #define parts_log2(A, S, F) \
917     PARTS_GENERIC_64_128(log2, A)(A, S, F)
918 
919 /*
920  * Helper functions for softfloat-parts.c.inc, per-size operations.
921  */
922 
923 #define FRAC_GENERIC_64_128(NAME, P) \
924     _Generic((P), FloatParts64 *: frac64_##NAME, \
925                   FloatParts128 *: frac128_##NAME)
926 
927 #define FRAC_GENERIC_64_128_256(NAME, P) \
928     _Generic((P), FloatParts64 *: frac64_##NAME, \
929                   FloatParts128 *: frac128_##NAME, \
930                   FloatParts256 *: frac256_##NAME)
931 
932 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
933 {
934     return uadd64_overflow(a->frac, b->frac, &r->frac);
935 }
936 
937 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
938 {
939     bool c = 0;
940     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
941     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
942     return c;
943 }
944 
945 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
946 {
947     bool c = 0;
948     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
949     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
950     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
951     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
952     return c;
953 }
954 
955 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
956 
957 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
958 {
959     return uadd64_overflow(a->frac, c, &r->frac);
960 }
961 
962 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
963 {
964     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
965     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
966 }
967 
968 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
969 
970 static void frac64_allones(FloatParts64 *a)
971 {
972     a->frac = -1;
973 }
974 
975 static void frac128_allones(FloatParts128 *a)
976 {
977     a->frac_hi = a->frac_lo = -1;
978 }
979 
980 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
981 
982 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
983 {
984     return (a->frac == b->frac ? float_relation_equal
985             : a->frac < b->frac ? float_relation_less
986             : float_relation_greater);
987 }
988 
989 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
990 {
991     uint64_t ta = a->frac_hi, tb = b->frac_hi;
992     if (ta == tb) {
993         ta = a->frac_lo, tb = b->frac_lo;
994         if (ta == tb) {
995             return float_relation_equal;
996         }
997     }
998     return ta < tb ? float_relation_less : float_relation_greater;
999 }
1000 
1001 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1002 
1003 static void frac64_clear(FloatParts64 *a)
1004 {
1005     a->frac = 0;
1006 }
1007 
1008 static void frac128_clear(FloatParts128 *a)
1009 {
1010     a->frac_hi = a->frac_lo = 0;
1011 }
1012 
1013 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1014 
1015 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1016 {
1017     uint64_t n1, n0, r, q;
1018     bool ret;
1019 
1020     /*
1021      * We want a 2*N / N-bit division to produce exactly an N-bit
1022      * result, so that we do not lose any precision and so that we
1023      * do not have to renormalize afterward.  If A.frac < B.frac,
1024      * then division would produce an (N-1)-bit result; shift A left
1025      * by one to produce the an N-bit result, and return true to
1026      * decrement the exponent to match.
1027      *
1028      * The udiv_qrnnd algorithm that we're using requires normalization,
1029      * i.e. the msb of the denominator must be set, which is already true.
1030      */
1031     ret = a->frac < b->frac;
1032     if (ret) {
1033         n0 = a->frac;
1034         n1 = 0;
1035     } else {
1036         n0 = a->frac >> 1;
1037         n1 = a->frac << 63;
1038     }
1039     q = udiv_qrnnd(&r, n0, n1, b->frac);
1040 
1041     /* Set lsb if there is a remainder, to set inexact. */
1042     a->frac = q | (r != 0);
1043 
1044     return ret;
1045 }
1046 
1047 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1048 {
1049     uint64_t q0, q1, a0, a1, b0, b1;
1050     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1051     bool ret = false;
1052 
1053     a0 = a->frac_hi, a1 = a->frac_lo;
1054     b0 = b->frac_hi, b1 = b->frac_lo;
1055 
1056     ret = lt128(a0, a1, b0, b1);
1057     if (!ret) {
1058         a1 = shr_double(a0, a1, 1);
1059         a0 = a0 >> 1;
1060     }
1061 
1062     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1063     q0 = estimateDiv128To64(a0, a1, b0);
1064 
1065     /*
1066      * Estimate is high because B1 was not included (unless B1 == 0).
1067      * Reduce quotient and increase remainder until remainder is non-negative.
1068      * This loop will execute 0 to 2 times.
1069      */
1070     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1071     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1072     while (r0 != 0) {
1073         q0--;
1074         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1075     }
1076 
1077     /* Repeat using the remainder, producing a second word of quotient. */
1078     q1 = estimateDiv128To64(r1, r2, b0);
1079     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1080     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1081     while (r1 != 0) {
1082         q1--;
1083         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1084     }
1085 
1086     /* Any remainder indicates inexact; set sticky bit. */
1087     q1 |= (r2 | r3) != 0;
1088 
1089     a->frac_hi = q0;
1090     a->frac_lo = q1;
1091     return ret;
1092 }
1093 
1094 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1095 
1096 static bool frac64_eqz(FloatParts64 *a)
1097 {
1098     return a->frac == 0;
1099 }
1100 
1101 static bool frac128_eqz(FloatParts128 *a)
1102 {
1103     return (a->frac_hi | a->frac_lo) == 0;
1104 }
1105 
1106 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1107 
1108 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1109 {
1110     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1111 }
1112 
1113 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1114 {
1115     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1116                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1117 }
1118 
1119 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1120 
1121 static void frac64_neg(FloatParts64 *a)
1122 {
1123     a->frac = -a->frac;
1124 }
1125 
1126 static void frac128_neg(FloatParts128 *a)
1127 {
1128     bool c = 0;
1129     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1130     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1131 }
1132 
1133 static void frac256_neg(FloatParts256 *a)
1134 {
1135     bool c = 0;
1136     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1138     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1139     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1140 }
1141 
1142 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1143 
1144 static int frac64_normalize(FloatParts64 *a)
1145 {
1146     if (a->frac) {
1147         int shift = clz64(a->frac);
1148         a->frac <<= shift;
1149         return shift;
1150     }
1151     return 64;
1152 }
1153 
1154 static int frac128_normalize(FloatParts128 *a)
1155 {
1156     if (a->frac_hi) {
1157         int shl = clz64(a->frac_hi);
1158         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1159         a->frac_lo <<= shl;
1160         return shl;
1161     } else if (a->frac_lo) {
1162         int shl = clz64(a->frac_lo);
1163         a->frac_hi = a->frac_lo << shl;
1164         a->frac_lo = 0;
1165         return shl + 64;
1166     }
1167     return 128;
1168 }
1169 
1170 static int frac256_normalize(FloatParts256 *a)
1171 {
1172     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1173     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1174     int ret, shl;
1175 
1176     if (likely(a0)) {
1177         shl = clz64(a0);
1178         if (shl == 0) {
1179             return 0;
1180         }
1181         ret = shl;
1182     } else {
1183         if (a1) {
1184             ret = 64;
1185             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1186         } else if (a2) {
1187             ret = 128;
1188             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1189         } else if (a3) {
1190             ret = 192;
1191             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1192         } else {
1193             ret = 256;
1194             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1195             goto done;
1196         }
1197         shl = clz64(a0);
1198         if (shl == 0) {
1199             goto done;
1200         }
1201         ret += shl;
1202     }
1203 
1204     a0 = shl_double(a0, a1, shl);
1205     a1 = shl_double(a1, a2, shl);
1206     a2 = shl_double(a2, a3, shl);
1207     a3 <<= shl;
1208 
1209  done:
1210     a->frac_hi = a0;
1211     a->frac_hm = a1;
1212     a->frac_lm = a2;
1213     a->frac_lo = a3;
1214     return ret;
1215 }
1216 
1217 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1218 
1219 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1220 {
1221     uint64_t a0, a1, b0, t0, t1, q, quot;
1222     int exp_diff = a->exp - b->exp;
1223     int shift;
1224 
1225     a0 = a->frac;
1226     a1 = 0;
1227 
1228     if (exp_diff < -1) {
1229         if (mod_quot) {
1230             *mod_quot = 0;
1231         }
1232         return;
1233     }
1234     if (exp_diff == -1) {
1235         a0 >>= 1;
1236         exp_diff = 0;
1237     }
1238 
1239     b0 = b->frac;
1240     quot = q = b0 <= a0;
1241     if (q) {
1242         a0 -= b0;
1243     }
1244 
1245     exp_diff -= 64;
1246     while (exp_diff > 0) {
1247         q = estimateDiv128To64(a0, a1, b0);
1248         q = q > 2 ? q - 2 : 0;
1249         mul64To128(b0, q, &t0, &t1);
1250         sub128(a0, a1, t0, t1, &a0, &a1);
1251         shortShift128Left(a0, a1, 62, &a0, &a1);
1252         exp_diff -= 62;
1253         quot = (quot << 62) + q;
1254     }
1255 
1256     exp_diff += 64;
1257     if (exp_diff > 0) {
1258         q = estimateDiv128To64(a0, a1, b0);
1259         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1260         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1261         sub128(a0, a1, t0, t1, &a0, &a1);
1262         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1263         while (le128(t0, t1, a0, a1)) {
1264             ++q;
1265             sub128(a0, a1, t0, t1, &a0, &a1);
1266         }
1267         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1268     } else {
1269         t0 = b0;
1270         t1 = 0;
1271     }
1272 
1273     if (mod_quot) {
1274         *mod_quot = quot;
1275     } else {
1276         sub128(t0, t1, a0, a1, &t0, &t1);
1277         if (lt128(t0, t1, a0, a1) ||
1278             (eq128(t0, t1, a0, a1) && (q & 1))) {
1279             a0 = t0;
1280             a1 = t1;
1281             a->sign = !a->sign;
1282         }
1283     }
1284 
1285     if (likely(a0)) {
1286         shift = clz64(a0);
1287         shortShift128Left(a0, a1, shift, &a0, &a1);
1288     } else if (likely(a1)) {
1289         shift = clz64(a1);
1290         a0 = a1 << shift;
1291         a1 = 0;
1292         shift += 64;
1293     } else {
1294         a->cls = float_class_zero;
1295         return;
1296     }
1297 
1298     a->exp = b->exp + exp_diff - shift;
1299     a->frac = a0 | (a1 != 0);
1300 }
1301 
1302 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1303                            uint64_t *mod_quot)
1304 {
1305     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1306     int exp_diff = a->exp - b->exp;
1307     int shift;
1308 
1309     a0 = a->frac_hi;
1310     a1 = a->frac_lo;
1311     a2 = 0;
1312 
1313     if (exp_diff < -1) {
1314         if (mod_quot) {
1315             *mod_quot = 0;
1316         }
1317         return;
1318     }
1319     if (exp_diff == -1) {
1320         shift128Right(a0, a1, 1, &a0, &a1);
1321         exp_diff = 0;
1322     }
1323 
1324     b0 = b->frac_hi;
1325     b1 = b->frac_lo;
1326 
1327     quot = q = le128(b0, b1, a0, a1);
1328     if (q) {
1329         sub128(a0, a1, b0, b1, &a0, &a1);
1330     }
1331 
1332     exp_diff -= 64;
1333     while (exp_diff > 0) {
1334         q = estimateDiv128To64(a0, a1, b0);
1335         q = q > 4 ? q - 4 : 0;
1336         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1337         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1338         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1339         exp_diff -= 61;
1340         quot = (quot << 61) + q;
1341     }
1342 
1343     exp_diff += 64;
1344     if (exp_diff > 0) {
1345         q = estimateDiv128To64(a0, a1, b0);
1346         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1347         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1348         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1349         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1350         while (le192(t0, t1, t2, a0, a1, a2)) {
1351             ++q;
1352             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1353         }
1354         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1355     } else {
1356         t0 = b0;
1357         t1 = b1;
1358         t2 = 0;
1359     }
1360 
1361     if (mod_quot) {
1362         *mod_quot = quot;
1363     } else {
1364         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1365         if (lt192(t0, t1, t2, a0, a1, a2) ||
1366             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1367             a0 = t0;
1368             a1 = t1;
1369             a2 = t2;
1370             a->sign = !a->sign;
1371         }
1372     }
1373 
1374     if (likely(a0)) {
1375         shift = clz64(a0);
1376         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1377     } else if (likely(a1)) {
1378         shift = clz64(a1);
1379         shortShift128Left(a1, a2, shift, &a0, &a1);
1380         a2 = 0;
1381         shift += 64;
1382     } else if (likely(a2)) {
1383         shift = clz64(a2);
1384         a0 = a2 << shift;
1385         a1 = a2 = 0;
1386         shift += 128;
1387     } else {
1388         a->cls = float_class_zero;
1389         return;
1390     }
1391 
1392     a->exp = b->exp + exp_diff - shift;
1393     a->frac_hi = a0;
1394     a->frac_lo = a1 | (a2 != 0);
1395 }
1396 
1397 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1398 
1399 static void frac64_shl(FloatParts64 *a, int c)
1400 {
1401     a->frac <<= c;
1402 }
1403 
1404 static void frac128_shl(FloatParts128 *a, int c)
1405 {
1406     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1407 
1408     if (c & 64) {
1409         a0 = a1, a1 = 0;
1410     }
1411 
1412     c &= 63;
1413     if (c) {
1414         a0 = shl_double(a0, a1, c);
1415         a1 = a1 << c;
1416     }
1417 
1418     a->frac_hi = a0;
1419     a->frac_lo = a1;
1420 }
1421 
1422 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1423 
1424 static void frac64_shr(FloatParts64 *a, int c)
1425 {
1426     a->frac >>= c;
1427 }
1428 
1429 static void frac128_shr(FloatParts128 *a, int c)
1430 {
1431     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1432 
1433     if (c & 64) {
1434         a1 = a0, a0 = 0;
1435     }
1436 
1437     c &= 63;
1438     if (c) {
1439         a1 = shr_double(a0, a1, c);
1440         a0 = a0 >> c;
1441     }
1442 
1443     a->frac_hi = a0;
1444     a->frac_lo = a1;
1445 }
1446 
1447 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1448 
1449 static void frac64_shrjam(FloatParts64 *a, int c)
1450 {
1451     uint64_t a0 = a->frac;
1452 
1453     if (likely(c != 0)) {
1454         if (likely(c < 64)) {
1455             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1456         } else {
1457             a0 = a0 != 0;
1458         }
1459         a->frac = a0;
1460     }
1461 }
1462 
1463 static void frac128_shrjam(FloatParts128 *a, int c)
1464 {
1465     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1466     uint64_t sticky = 0;
1467 
1468     if (unlikely(c == 0)) {
1469         return;
1470     } else if (likely(c < 64)) {
1471         /* nothing */
1472     } else if (likely(c < 128)) {
1473         sticky = a1;
1474         a1 = a0;
1475         a0 = 0;
1476         c &= 63;
1477         if (c == 0) {
1478             goto done;
1479         }
1480     } else {
1481         sticky = a0 | a1;
1482         a0 = a1 = 0;
1483         goto done;
1484     }
1485 
1486     sticky |= shr_double(a1, 0, c);
1487     a1 = shr_double(a0, a1, c);
1488     a0 = a0 >> c;
1489 
1490  done:
1491     a->frac_lo = a1 | (sticky != 0);
1492     a->frac_hi = a0;
1493 }
1494 
1495 static void frac256_shrjam(FloatParts256 *a, int c)
1496 {
1497     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1498     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1499     uint64_t sticky = 0;
1500 
1501     if (unlikely(c == 0)) {
1502         return;
1503     } else if (likely(c < 64)) {
1504         /* nothing */
1505     } else if (likely(c < 256)) {
1506         if (unlikely(c & 128)) {
1507             sticky |= a2 | a3;
1508             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1509         }
1510         if (unlikely(c & 64)) {
1511             sticky |= a3;
1512             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1513         }
1514         c &= 63;
1515         if (c == 0) {
1516             goto done;
1517         }
1518     } else {
1519         sticky = a0 | a1 | a2 | a3;
1520         a0 = a1 = a2 = a3 = 0;
1521         goto done;
1522     }
1523 
1524     sticky |= shr_double(a3, 0, c);
1525     a3 = shr_double(a2, a3, c);
1526     a2 = shr_double(a1, a2, c);
1527     a1 = shr_double(a0, a1, c);
1528     a0 = a0 >> c;
1529 
1530  done:
1531     a->frac_lo = a3 | (sticky != 0);
1532     a->frac_lm = a2;
1533     a->frac_hm = a1;
1534     a->frac_hi = a0;
1535 }
1536 
1537 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1538 
1539 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1540 {
1541     return usub64_overflow(a->frac, b->frac, &r->frac);
1542 }
1543 
1544 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1545 {
1546     bool c = 0;
1547     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1548     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1549     return c;
1550 }
1551 
1552 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1553 {
1554     bool c = 0;
1555     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1556     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1557     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1558     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1559     return c;
1560 }
1561 
1562 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1563 
1564 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1565 {
1566     r->frac = a->frac_hi | (a->frac_lo != 0);
1567 }
1568 
1569 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1570 {
1571     r->frac_hi = a->frac_hi;
1572     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1573 }
1574 
1575 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1576 
1577 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1578 {
1579     r->frac_hi = a->frac;
1580     r->frac_lo = 0;
1581 }
1582 
1583 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1584 {
1585     r->frac_hi = a->frac_hi;
1586     r->frac_hm = a->frac_lo;
1587     r->frac_lm = 0;
1588     r->frac_lo = 0;
1589 }
1590 
1591 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1592 
1593 /*
1594  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1595  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1596  * and thus MIT licenced.
1597  */
1598 static const uint16_t rsqrt_tab[128] = {
1599     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1600     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1601     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1602     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1603     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1604     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1605     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1606     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1607     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1608     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1609     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1610     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1611     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1612     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1613     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1614     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1615 };
1616 
1617 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1618 #define FloatPartsN    glue(FloatParts,N)
1619 #define FloatPartsW    glue(FloatParts,W)
1620 
1621 #define N 64
1622 #define W 128
1623 
1624 #include "softfloat-parts-addsub.c.inc"
1625 #include "softfloat-parts.c.inc"
1626 
1627 #undef  N
1628 #undef  W
1629 #define N 128
1630 #define W 256
1631 
1632 #include "softfloat-parts-addsub.c.inc"
1633 #include "softfloat-parts.c.inc"
1634 
1635 #undef  N
1636 #undef  W
1637 #define N            256
1638 
1639 #include "softfloat-parts-addsub.c.inc"
1640 
1641 #undef  N
1642 #undef  W
1643 #undef  partsN
1644 #undef  FloatPartsN
1645 #undef  FloatPartsW
1646 
1647 /*
1648  * Pack/unpack routines with a specific FloatFmt.
1649  */
1650 
1651 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1652                                       float_status *s, const FloatFmt *params)
1653 {
1654     float16_unpack_raw(p, f);
1655     parts_canonicalize(p, s, params);
1656 }
1657 
1658 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1659                                      float_status *s)
1660 {
1661     float16a_unpack_canonical(p, f, s, &float16_params);
1662 }
1663 
1664 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1665                                       float_status *s)
1666 {
1667     bfloat16_unpack_raw(p, f);
1668     parts_canonicalize(p, s, &bfloat16_params);
1669 }
1670 
1671 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1672                                              float_status *s,
1673                                              const FloatFmt *params)
1674 {
1675     parts_uncanon(p, s, params);
1676     return float16_pack_raw(p);
1677 }
1678 
1679 static float16 float16_round_pack_canonical(FloatParts64 *p,
1680                                             float_status *s)
1681 {
1682     return float16a_round_pack_canonical(p, s, &float16_params);
1683 }
1684 
1685 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1686                                               float_status *s)
1687 {
1688     parts_uncanon(p, s, &bfloat16_params);
1689     return bfloat16_pack_raw(p);
1690 }
1691 
1692 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1693                                      float_status *s)
1694 {
1695     float32_unpack_raw(p, f);
1696     parts_canonicalize(p, s, &float32_params);
1697 }
1698 
1699 static float32 float32_round_pack_canonical(FloatParts64 *p,
1700                                             float_status *s)
1701 {
1702     parts_uncanon(p, s, &float32_params);
1703     return float32_pack_raw(p);
1704 }
1705 
1706 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1707                                      float_status *s)
1708 {
1709     float64_unpack_raw(p, f);
1710     parts_canonicalize(p, s, &float64_params);
1711 }
1712 
1713 static float64 float64_round_pack_canonical(FloatParts64 *p,
1714                                             float_status *s)
1715 {
1716     parts_uncanon(p, s, &float64_params);
1717     return float64_pack_raw(p);
1718 }
1719 
1720 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1721                                                float_status *s)
1722 {
1723     parts_uncanon(p, s, &float32_params);
1724 
1725     /*
1726      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1727      * We need to adjust the fraction higher so that the least N bits are
1728      * zero, and the fraction is adjacent to the float64 implicit bit.
1729      */
1730     switch (p->cls) {
1731     case float_class_normal:
1732         if (unlikely(p->exp == 0)) {
1733             /*
1734              * The result is denormal for float32, but can be represented
1735              * in normalized form for float64.  Adjust, per canonicalize.
1736              */
1737             int shift = frac_normalize(p);
1738             p->exp = (float32_params.frac_shift -
1739                       float32_params.exp_bias - shift + 1 +
1740                       float64_params.exp_bias);
1741             frac_shr(p, float64_params.frac_shift);
1742         } else {
1743             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1744             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1745         }
1746         break;
1747     case float_class_snan:
1748     case float_class_qnan:
1749         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1750         p->exp = float64_params.exp_max;
1751         break;
1752     case float_class_inf:
1753         p->exp = float64_params.exp_max;
1754         break;
1755     case float_class_zero:
1756         break;
1757     default:
1758         g_assert_not_reached();
1759     }
1760 
1761     return float64_pack_raw(p);
1762 }
1763 
1764 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1765                                       float_status *s)
1766 {
1767     float128_unpack_raw(p, f);
1768     parts_canonicalize(p, s, &float128_params);
1769 }
1770 
1771 static float128 float128_round_pack_canonical(FloatParts128 *p,
1772                                               float_status *s)
1773 {
1774     parts_uncanon(p, s, &float128_params);
1775     return float128_pack_raw(p);
1776 }
1777 
1778 /* Returns false if the encoding is invalid. */
1779 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1780                                       float_status *s)
1781 {
1782     /* Ensure rounding precision is set before beginning. */
1783     switch (s->floatx80_rounding_precision) {
1784     case floatx80_precision_x:
1785     case floatx80_precision_d:
1786     case floatx80_precision_s:
1787         break;
1788     default:
1789         g_assert_not_reached();
1790     }
1791 
1792     if (unlikely(floatx80_invalid_encoding(f))) {
1793         float_raise(float_flag_invalid, s);
1794         return false;
1795     }
1796 
1797     floatx80_unpack_raw(p, f);
1798 
1799     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1800         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1801     } else {
1802         /* The explicit integer bit is ignored, after invalid checks. */
1803         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1804         p->cls = (p->frac_hi == 0 ? float_class_inf
1805                   : parts_is_snan_frac(p->frac_hi, s)
1806                   ? float_class_snan : float_class_qnan);
1807     }
1808     return true;
1809 }
1810 
1811 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1812                                               float_status *s)
1813 {
1814     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1815     uint64_t frac;
1816     int exp;
1817 
1818     switch (p->cls) {
1819     case float_class_normal:
1820         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1821             parts_uncanon_normal(p, s, fmt);
1822             frac = p->frac_hi;
1823             exp = p->exp;
1824         } else {
1825             FloatParts64 p64;
1826 
1827             p64.sign = p->sign;
1828             p64.exp = p->exp;
1829             frac_truncjam(&p64, p);
1830             parts_uncanon_normal(&p64, s, fmt);
1831             frac = p64.frac;
1832             exp = p64.exp;
1833         }
1834         if (exp != fmt->exp_max) {
1835             break;
1836         }
1837         /* rounded to inf -- fall through to set frac correctly */
1838 
1839     case float_class_inf:
1840         /* x86 and m68k differ in the setting of the integer bit. */
1841         frac = floatx80_infinity_low;
1842         exp = fmt->exp_max;
1843         break;
1844 
1845     case float_class_zero:
1846         frac = 0;
1847         exp = 0;
1848         break;
1849 
1850     case float_class_snan:
1851     case float_class_qnan:
1852         /* NaNs have the integer bit set. */
1853         frac = p->frac_hi | (1ull << 63);
1854         exp = fmt->exp_max;
1855         break;
1856 
1857     default:
1858         g_assert_not_reached();
1859     }
1860 
1861     return packFloatx80(p->sign, exp, frac);
1862 }
1863 
1864 /*
1865  * Addition and subtraction
1866  */
1867 
1868 static float16 QEMU_FLATTEN
1869 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1870 {
1871     FloatParts64 pa, pb, *pr;
1872 
1873     float16_unpack_canonical(&pa, a, status);
1874     float16_unpack_canonical(&pb, b, status);
1875     pr = parts_addsub(&pa, &pb, status, subtract);
1876 
1877     return float16_round_pack_canonical(pr, status);
1878 }
1879 
1880 float16 float16_add(float16 a, float16 b, float_status *status)
1881 {
1882     return float16_addsub(a, b, status, false);
1883 }
1884 
1885 float16 float16_sub(float16 a, float16 b, float_status *status)
1886 {
1887     return float16_addsub(a, b, status, true);
1888 }
1889 
1890 static float32 QEMU_SOFTFLOAT_ATTR
1891 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1892 {
1893     FloatParts64 pa, pb, *pr;
1894 
1895     float32_unpack_canonical(&pa, a, status);
1896     float32_unpack_canonical(&pb, b, status);
1897     pr = parts_addsub(&pa, &pb, status, subtract);
1898 
1899     return float32_round_pack_canonical(pr, status);
1900 }
1901 
1902 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1903 {
1904     return soft_f32_addsub(a, b, status, false);
1905 }
1906 
1907 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1908 {
1909     return soft_f32_addsub(a, b, status, true);
1910 }
1911 
1912 static float64 QEMU_SOFTFLOAT_ATTR
1913 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1914 {
1915     FloatParts64 pa, pb, *pr;
1916 
1917     float64_unpack_canonical(&pa, a, status);
1918     float64_unpack_canonical(&pb, b, status);
1919     pr = parts_addsub(&pa, &pb, status, subtract);
1920 
1921     return float64_round_pack_canonical(pr, status);
1922 }
1923 
1924 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1925 {
1926     return soft_f64_addsub(a, b, status, false);
1927 }
1928 
1929 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1930 {
1931     return soft_f64_addsub(a, b, status, true);
1932 }
1933 
1934 static float hard_f32_add(float a, float b)
1935 {
1936     return a + b;
1937 }
1938 
1939 static float hard_f32_sub(float a, float b)
1940 {
1941     return a - b;
1942 }
1943 
1944 static double hard_f64_add(double a, double b)
1945 {
1946     return a + b;
1947 }
1948 
1949 static double hard_f64_sub(double a, double b)
1950 {
1951     return a - b;
1952 }
1953 
1954 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1955 {
1956     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1957         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1958     }
1959     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1960 }
1961 
1962 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1963 {
1964     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1965         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1966     } else {
1967         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1968     }
1969 }
1970 
1971 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1972                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1973 {
1974     return float32_gen2(a, b, s, hard, soft,
1975                         f32_is_zon2, f32_addsubmul_post);
1976 }
1977 
1978 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1979                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1980 {
1981     return float64_gen2(a, b, s, hard, soft,
1982                         f64_is_zon2, f64_addsubmul_post);
1983 }
1984 
1985 float32 QEMU_FLATTEN
1986 float32_add(float32 a, float32 b, float_status *s)
1987 {
1988     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1989 }
1990 
1991 float32 QEMU_FLATTEN
1992 float32_sub(float32 a, float32 b, float_status *s)
1993 {
1994     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1995 }
1996 
1997 float64 QEMU_FLATTEN
1998 float64_add(float64 a, float64 b, float_status *s)
1999 {
2000     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2001 }
2002 
2003 float64 QEMU_FLATTEN
2004 float64_sub(float64 a, float64 b, float_status *s)
2005 {
2006     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2007 }
2008 
2009 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2010                                  bool subtract)
2011 {
2012     FloatParts64 pa, pb, *pr;
2013 
2014     float64_unpack_canonical(&pa, a, status);
2015     float64_unpack_canonical(&pb, b, status);
2016     pr = parts_addsub(&pa, &pb, status, subtract);
2017 
2018     return float64r32_round_pack_canonical(pr, status);
2019 }
2020 
2021 float64 float64r32_add(float64 a, float64 b, float_status *status)
2022 {
2023     return float64r32_addsub(a, b, status, false);
2024 }
2025 
2026 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2027 {
2028     return float64r32_addsub(a, b, status, true);
2029 }
2030 
2031 static bfloat16 QEMU_FLATTEN
2032 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2033 {
2034     FloatParts64 pa, pb, *pr;
2035 
2036     bfloat16_unpack_canonical(&pa, a, status);
2037     bfloat16_unpack_canonical(&pb, b, status);
2038     pr = parts_addsub(&pa, &pb, status, subtract);
2039 
2040     return bfloat16_round_pack_canonical(pr, status);
2041 }
2042 
2043 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2044 {
2045     return bfloat16_addsub(a, b, status, false);
2046 }
2047 
2048 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2049 {
2050     return bfloat16_addsub(a, b, status, true);
2051 }
2052 
2053 static float128 QEMU_FLATTEN
2054 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2055 {
2056     FloatParts128 pa, pb, *pr;
2057 
2058     float128_unpack_canonical(&pa, a, status);
2059     float128_unpack_canonical(&pb, b, status);
2060     pr = parts_addsub(&pa, &pb, status, subtract);
2061 
2062     return float128_round_pack_canonical(pr, status);
2063 }
2064 
2065 float128 float128_add(float128 a, float128 b, float_status *status)
2066 {
2067     return float128_addsub(a, b, status, false);
2068 }
2069 
2070 float128 float128_sub(float128 a, float128 b, float_status *status)
2071 {
2072     return float128_addsub(a, b, status, true);
2073 }
2074 
2075 static floatx80 QEMU_FLATTEN
2076 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079 
2080     if (!floatx80_unpack_canonical(&pa, a, status) ||
2081         !floatx80_unpack_canonical(&pb, b, status)) {
2082         return floatx80_default_nan(status);
2083     }
2084 
2085     pr = parts_addsub(&pa, &pb, status, subtract);
2086     return floatx80_round_pack_canonical(pr, status);
2087 }
2088 
2089 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2090 {
2091     return floatx80_addsub(a, b, status, false);
2092 }
2093 
2094 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2095 {
2096     return floatx80_addsub(a, b, status, true);
2097 }
2098 
2099 /*
2100  * Multiplication
2101  */
2102 
2103 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2104 {
2105     FloatParts64 pa, pb, *pr;
2106 
2107     float16_unpack_canonical(&pa, a, status);
2108     float16_unpack_canonical(&pb, b, status);
2109     pr = parts_mul(&pa, &pb, status);
2110 
2111     return float16_round_pack_canonical(pr, status);
2112 }
2113 
2114 static float32 QEMU_SOFTFLOAT_ATTR
2115 soft_f32_mul(float32 a, float32 b, float_status *status)
2116 {
2117     FloatParts64 pa, pb, *pr;
2118 
2119     float32_unpack_canonical(&pa, a, status);
2120     float32_unpack_canonical(&pb, b, status);
2121     pr = parts_mul(&pa, &pb, status);
2122 
2123     return float32_round_pack_canonical(pr, status);
2124 }
2125 
2126 static float64 QEMU_SOFTFLOAT_ATTR
2127 soft_f64_mul(float64 a, float64 b, float_status *status)
2128 {
2129     FloatParts64 pa, pb, *pr;
2130 
2131     float64_unpack_canonical(&pa, a, status);
2132     float64_unpack_canonical(&pb, b, status);
2133     pr = parts_mul(&pa, &pb, status);
2134 
2135     return float64_round_pack_canonical(pr, status);
2136 }
2137 
2138 static float hard_f32_mul(float a, float b)
2139 {
2140     return a * b;
2141 }
2142 
2143 static double hard_f64_mul(double a, double b)
2144 {
2145     return a * b;
2146 }
2147 
2148 float32 QEMU_FLATTEN
2149 float32_mul(float32 a, float32 b, float_status *s)
2150 {
2151     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2152                         f32_is_zon2, f32_addsubmul_post);
2153 }
2154 
2155 float64 QEMU_FLATTEN
2156 float64_mul(float64 a, float64 b, float_status *s)
2157 {
2158     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2159                         f64_is_zon2, f64_addsubmul_post);
2160 }
2161 
2162 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2163 {
2164     FloatParts64 pa, pb, *pr;
2165 
2166     float64_unpack_canonical(&pa, a, status);
2167     float64_unpack_canonical(&pb, b, status);
2168     pr = parts_mul(&pa, &pb, status);
2169 
2170     return float64r32_round_pack_canonical(pr, status);
2171 }
2172 
2173 bfloat16 QEMU_FLATTEN
2174 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2175 {
2176     FloatParts64 pa, pb, *pr;
2177 
2178     bfloat16_unpack_canonical(&pa, a, status);
2179     bfloat16_unpack_canonical(&pb, b, status);
2180     pr = parts_mul(&pa, &pb, status);
2181 
2182     return bfloat16_round_pack_canonical(pr, status);
2183 }
2184 
2185 float128 QEMU_FLATTEN
2186 float128_mul(float128 a, float128 b, float_status *status)
2187 {
2188     FloatParts128 pa, pb, *pr;
2189 
2190     float128_unpack_canonical(&pa, a, status);
2191     float128_unpack_canonical(&pb, b, status);
2192     pr = parts_mul(&pa, &pb, status);
2193 
2194     return float128_round_pack_canonical(pr, status);
2195 }
2196 
2197 floatx80 QEMU_FLATTEN
2198 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2199 {
2200     FloatParts128 pa, pb, *pr;
2201 
2202     if (!floatx80_unpack_canonical(&pa, a, status) ||
2203         !floatx80_unpack_canonical(&pb, b, status)) {
2204         return floatx80_default_nan(status);
2205     }
2206 
2207     pr = parts_mul(&pa, &pb, status);
2208     return floatx80_round_pack_canonical(pr, status);
2209 }
2210 
2211 /*
2212  * Fused multiply-add
2213  */
2214 
2215 float16 QEMU_FLATTEN
2216 float16_muladd_scalbn(float16 a, float16 b, float16 c,
2217                       int scale, int flags, float_status *status)
2218 {
2219     FloatParts64 pa, pb, pc, *pr;
2220 
2221     float16_unpack_canonical(&pa, a, status);
2222     float16_unpack_canonical(&pb, b, status);
2223     float16_unpack_canonical(&pc, c, status);
2224     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2225 
2226     return float16_round_pack_canonical(pr, status);
2227 }
2228 
2229 float16 float16_muladd(float16 a, float16 b, float16 c,
2230                        int flags, float_status *status)
2231 {
2232     return float16_muladd_scalbn(a, b, c, 0, flags, status);
2233 }
2234 
2235 float32 QEMU_SOFTFLOAT_ATTR
2236 float32_muladd_scalbn(float32 a, float32 b, float32 c,
2237                       int scale, int flags, float_status *status)
2238 {
2239     FloatParts64 pa, pb, pc, *pr;
2240 
2241     float32_unpack_canonical(&pa, a, status);
2242     float32_unpack_canonical(&pb, b, status);
2243     float32_unpack_canonical(&pc, c, status);
2244     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2245 
2246     return float32_round_pack_canonical(pr, status);
2247 }
2248 
2249 float64 QEMU_SOFTFLOAT_ATTR
2250 float64_muladd_scalbn(float64 a, float64 b, float64 c,
2251                       int scale, int flags, float_status *status)
2252 {
2253     FloatParts64 pa, pb, pc, *pr;
2254 
2255     float64_unpack_canonical(&pa, a, status);
2256     float64_unpack_canonical(&pb, b, status);
2257     float64_unpack_canonical(&pc, c, status);
2258     pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
2259 
2260     return float64_round_pack_canonical(pr, status);
2261 }
2262 
2263 static bool force_soft_fma;
2264 
2265 float32 QEMU_FLATTEN
2266 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2267 {
2268     union_float32 ua, ub, uc, ur;
2269 
2270     ua.s = xa;
2271     ub.s = xb;
2272     uc.s = xc;
2273 
2274     if (unlikely(!can_use_fpu(s))) {
2275         goto soft;
2276     }
2277     if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
2278         goto soft;
2279     }
2280 
2281     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2282     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2283         goto soft;
2284     }
2285 
2286     if (unlikely(force_soft_fma)) {
2287         goto soft;
2288     }
2289 
2290     /*
2291      * When (a || b) == 0, there's no need to check for under/over flow,
2292      * since we know the addend is (normal || 0) and the product is 0.
2293      */
2294     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2295         union_float32 up;
2296         bool prod_sign;
2297 
2298         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2299         prod_sign ^= !!(flags & float_muladd_negate_product);
2300         up.s = float32_set_sign(float32_zero, prod_sign);
2301 
2302         if (flags & float_muladd_negate_c) {
2303             uc.h = -uc.h;
2304         }
2305         ur.h = up.h + uc.h;
2306     } else {
2307         union_float32 ua_orig = ua;
2308         union_float32 uc_orig = uc;
2309 
2310         if (flags & float_muladd_negate_product) {
2311             ua.h = -ua.h;
2312         }
2313         if (flags & float_muladd_negate_c) {
2314             uc.h = -uc.h;
2315         }
2316 
2317         ur.h = fmaf(ua.h, ub.h, uc.h);
2318 
2319         if (unlikely(f32_is_inf(ur))) {
2320             float_raise(float_flag_overflow, s);
2321         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2322             ua = ua_orig;
2323             uc = uc_orig;
2324             goto soft;
2325         }
2326     }
2327     if (flags & float_muladd_negate_result) {
2328         return float32_chs(ur.s);
2329     }
2330     return ur.s;
2331 
2332  soft:
2333     return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2334 }
2335 
2336 float64 QEMU_FLATTEN
2337 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2338 {
2339     union_float64 ua, ub, uc, ur;
2340 
2341     ua.s = xa;
2342     ub.s = xb;
2343     uc.s = xc;
2344 
2345     if (unlikely(!can_use_fpu(s))) {
2346         goto soft;
2347     }
2348 
2349     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2350     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2351         goto soft;
2352     }
2353 
2354     if (unlikely(force_soft_fma)) {
2355         goto soft;
2356     }
2357 
2358     /*
2359      * When (a || b) == 0, there's no need to check for under/over flow,
2360      * since we know the addend is (normal || 0) and the product is 0.
2361      */
2362     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2363         union_float64 up;
2364         bool prod_sign;
2365 
2366         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2367         prod_sign ^= !!(flags & float_muladd_negate_product);
2368         up.s = float64_set_sign(float64_zero, prod_sign);
2369 
2370         if (flags & float_muladd_negate_c) {
2371             uc.h = -uc.h;
2372         }
2373         ur.h = up.h + uc.h;
2374     } else {
2375         union_float64 ua_orig = ua;
2376         union_float64 uc_orig = uc;
2377 
2378         if (flags & float_muladd_negate_product) {
2379             ua.h = -ua.h;
2380         }
2381         if (flags & float_muladd_negate_c) {
2382             uc.h = -uc.h;
2383         }
2384 
2385         ur.h = fma(ua.h, ub.h, uc.h);
2386 
2387         if (unlikely(f64_is_inf(ur))) {
2388             float_raise(float_flag_overflow, s);
2389         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2390             ua = ua_orig;
2391             uc = uc_orig;
2392             goto soft;
2393         }
2394     }
2395     if (flags & float_muladd_negate_result) {
2396         return float64_chs(ur.s);
2397     }
2398     return ur.s;
2399 
2400  soft:
2401     return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
2402 }
2403 
2404 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2405                           int flags, float_status *status)
2406 {
2407     FloatParts64 pa, pb, pc, *pr;
2408 
2409     float64_unpack_canonical(&pa, a, status);
2410     float64_unpack_canonical(&pb, b, status);
2411     float64_unpack_canonical(&pc, c, status);
2412     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2413 
2414     return float64r32_round_pack_canonical(pr, status);
2415 }
2416 
2417 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2418                                       int flags, float_status *status)
2419 {
2420     FloatParts64 pa, pb, pc, *pr;
2421 
2422     bfloat16_unpack_canonical(&pa, a, status);
2423     bfloat16_unpack_canonical(&pb, b, status);
2424     bfloat16_unpack_canonical(&pc, c, status);
2425     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2426 
2427     return bfloat16_round_pack_canonical(pr, status);
2428 }
2429 
2430 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2431                                       int flags, float_status *status)
2432 {
2433     FloatParts128 pa, pb, pc, *pr;
2434 
2435     float128_unpack_canonical(&pa, a, status);
2436     float128_unpack_canonical(&pb, b, status);
2437     float128_unpack_canonical(&pc, c, status);
2438     pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
2439 
2440     return float128_round_pack_canonical(pr, status);
2441 }
2442 
2443 /*
2444  * Division
2445  */
2446 
2447 float16 float16_div(float16 a, float16 b, float_status *status)
2448 {
2449     FloatParts64 pa, pb, *pr;
2450 
2451     float16_unpack_canonical(&pa, a, status);
2452     float16_unpack_canonical(&pb, b, status);
2453     pr = parts_div(&pa, &pb, status);
2454 
2455     return float16_round_pack_canonical(pr, status);
2456 }
2457 
2458 static float32 QEMU_SOFTFLOAT_ATTR
2459 soft_f32_div(float32 a, float32 b, float_status *status)
2460 {
2461     FloatParts64 pa, pb, *pr;
2462 
2463     float32_unpack_canonical(&pa, a, status);
2464     float32_unpack_canonical(&pb, b, status);
2465     pr = parts_div(&pa, &pb, status);
2466 
2467     return float32_round_pack_canonical(pr, status);
2468 }
2469 
2470 static float64 QEMU_SOFTFLOAT_ATTR
2471 soft_f64_div(float64 a, float64 b, float_status *status)
2472 {
2473     FloatParts64 pa, pb, *pr;
2474 
2475     float64_unpack_canonical(&pa, a, status);
2476     float64_unpack_canonical(&pb, b, status);
2477     pr = parts_div(&pa, &pb, status);
2478 
2479     return float64_round_pack_canonical(pr, status);
2480 }
2481 
2482 static float hard_f32_div(float a, float b)
2483 {
2484     return a / b;
2485 }
2486 
2487 static double hard_f64_div(double a, double b)
2488 {
2489     return a / b;
2490 }
2491 
2492 static bool f32_div_pre(union_float32 a, union_float32 b)
2493 {
2494     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2495         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2496                fpclassify(b.h) == FP_NORMAL;
2497     }
2498     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2499 }
2500 
2501 static bool f64_div_pre(union_float64 a, union_float64 b)
2502 {
2503     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2504         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2505                fpclassify(b.h) == FP_NORMAL;
2506     }
2507     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2508 }
2509 
2510 static bool f32_div_post(union_float32 a, union_float32 b)
2511 {
2512     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2513         return fpclassify(a.h) != FP_ZERO;
2514     }
2515     return !float32_is_zero(a.s);
2516 }
2517 
2518 static bool f64_div_post(union_float64 a, union_float64 b)
2519 {
2520     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2521         return fpclassify(a.h) != FP_ZERO;
2522     }
2523     return !float64_is_zero(a.s);
2524 }
2525 
2526 float32 QEMU_FLATTEN
2527 float32_div(float32 a, float32 b, float_status *s)
2528 {
2529     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2530                         f32_div_pre, f32_div_post);
2531 }
2532 
2533 float64 QEMU_FLATTEN
2534 float64_div(float64 a, float64 b, float_status *s)
2535 {
2536     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2537                         f64_div_pre, f64_div_post);
2538 }
2539 
2540 float64 float64r32_div(float64 a, float64 b, float_status *status)
2541 {
2542     FloatParts64 pa, pb, *pr;
2543 
2544     float64_unpack_canonical(&pa, a, status);
2545     float64_unpack_canonical(&pb, b, status);
2546     pr = parts_div(&pa, &pb, status);
2547 
2548     return float64r32_round_pack_canonical(pr, status);
2549 }
2550 
2551 bfloat16 QEMU_FLATTEN
2552 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2553 {
2554     FloatParts64 pa, pb, *pr;
2555 
2556     bfloat16_unpack_canonical(&pa, a, status);
2557     bfloat16_unpack_canonical(&pb, b, status);
2558     pr = parts_div(&pa, &pb, status);
2559 
2560     return bfloat16_round_pack_canonical(pr, status);
2561 }
2562 
2563 float128 QEMU_FLATTEN
2564 float128_div(float128 a, float128 b, float_status *status)
2565 {
2566     FloatParts128 pa, pb, *pr;
2567 
2568     float128_unpack_canonical(&pa, a, status);
2569     float128_unpack_canonical(&pb, b, status);
2570     pr = parts_div(&pa, &pb, status);
2571 
2572     return float128_round_pack_canonical(pr, status);
2573 }
2574 
2575 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2576 {
2577     FloatParts128 pa, pb, *pr;
2578 
2579     if (!floatx80_unpack_canonical(&pa, a, status) ||
2580         !floatx80_unpack_canonical(&pb, b, status)) {
2581         return floatx80_default_nan(status);
2582     }
2583 
2584     pr = parts_div(&pa, &pb, status);
2585     return floatx80_round_pack_canonical(pr, status);
2586 }
2587 
2588 /*
2589  * Remainder
2590  */
2591 
2592 float32 float32_rem(float32 a, float32 b, float_status *status)
2593 {
2594     FloatParts64 pa, pb, *pr;
2595 
2596     float32_unpack_canonical(&pa, a, status);
2597     float32_unpack_canonical(&pb, b, status);
2598     pr = parts_modrem(&pa, &pb, NULL, status);
2599 
2600     return float32_round_pack_canonical(pr, status);
2601 }
2602 
2603 float64 float64_rem(float64 a, float64 b, float_status *status)
2604 {
2605     FloatParts64 pa, pb, *pr;
2606 
2607     float64_unpack_canonical(&pa, a, status);
2608     float64_unpack_canonical(&pb, b, status);
2609     pr = parts_modrem(&pa, &pb, NULL, status);
2610 
2611     return float64_round_pack_canonical(pr, status);
2612 }
2613 
2614 float128 float128_rem(float128 a, float128 b, float_status *status)
2615 {
2616     FloatParts128 pa, pb, *pr;
2617 
2618     float128_unpack_canonical(&pa, a, status);
2619     float128_unpack_canonical(&pb, b, status);
2620     pr = parts_modrem(&pa, &pb, NULL, status);
2621 
2622     return float128_round_pack_canonical(pr, status);
2623 }
2624 
2625 /*
2626  * Returns the remainder of the extended double-precision floating-point value
2627  * `a' with respect to the corresponding value `b'.
2628  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2629  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2630  * the remainder based on truncating the quotient toward zero instead and
2631  * *quotient is set to the low 64 bits of the absolute value of the integer
2632  * quotient.
2633  */
2634 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2635                          uint64_t *quotient, float_status *status)
2636 {
2637     FloatParts128 pa, pb, *pr;
2638 
2639     *quotient = 0;
2640     if (!floatx80_unpack_canonical(&pa, a, status) ||
2641         !floatx80_unpack_canonical(&pb, b, status)) {
2642         return floatx80_default_nan(status);
2643     }
2644     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2645 
2646     return floatx80_round_pack_canonical(pr, status);
2647 }
2648 
2649 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2650 {
2651     uint64_t quotient;
2652     return floatx80_modrem(a, b, false, &quotient, status);
2653 }
2654 
2655 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2656 {
2657     uint64_t quotient;
2658     return floatx80_modrem(a, b, true, &quotient, status);
2659 }
2660 
2661 /*
2662  * Float to Float conversions
2663  *
2664  * Returns the result of converting one float format to another. The
2665  * conversion is performed according to the IEC/IEEE Standard for
2666  * Binary Floating-Point Arithmetic.
2667  *
2668  * Usually this only needs to take care of raising invalid exceptions
2669  * and handling the conversion on NaNs.
2670  */
2671 
2672 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2673 {
2674     switch (a->cls) {
2675     case float_class_snan:
2676         float_raise(float_flag_invalid_snan, s);
2677         /* fall through */
2678     case float_class_qnan:
2679         /*
2680          * There is no NaN in the destination format.  Raise Invalid
2681          * and return a zero with the sign of the input NaN.
2682          */
2683         float_raise(float_flag_invalid, s);
2684         a->cls = float_class_zero;
2685         break;
2686 
2687     case float_class_inf:
2688         /*
2689          * There is no Inf in the destination format.  Raise Invalid
2690          * and return the maximum normal with the correct sign.
2691          */
2692         float_raise(float_flag_invalid, s);
2693         a->cls = float_class_normal;
2694         a->exp = float16_params_ahp.exp_max;
2695         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2696                                   float16_params_ahp.frac_size + 1);
2697         break;
2698 
2699     case float_class_normal:
2700     case float_class_zero:
2701         break;
2702 
2703     default:
2704         g_assert_not_reached();
2705     }
2706 }
2707 
2708 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2709 {
2710     if (is_nan(a->cls)) {
2711         parts_return_nan(a, s);
2712     }
2713 }
2714 
2715 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2716 {
2717     if (is_nan(a->cls)) {
2718         parts_return_nan(a, s);
2719     }
2720 }
2721 
2722 #define parts_float_to_float(P, S) \
2723     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2724 
2725 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2726                                         float_status *s)
2727 {
2728     a->cls = b->cls;
2729     a->sign = b->sign;
2730     a->exp = b->exp;
2731 
2732     if (a->cls == float_class_normal) {
2733         frac_truncjam(a, b);
2734     } else if (is_nan(a->cls)) {
2735         /* Discard the low bits of the NaN. */
2736         a->frac = b->frac_hi;
2737         parts_return_nan(a, s);
2738     }
2739 }
2740 
2741 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2742                                        float_status *s)
2743 {
2744     a->cls = b->cls;
2745     a->sign = b->sign;
2746     a->exp = b->exp;
2747     frac_widen(a, b);
2748 
2749     if (is_nan(a->cls)) {
2750         parts_return_nan(a, s);
2751     }
2752 }
2753 
2754 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2755 {
2756     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2757     FloatParts64 p;
2758 
2759     float16a_unpack_canonical(&p, a, s, fmt16);
2760     parts_float_to_float(&p, s);
2761     return float32_round_pack_canonical(&p, s);
2762 }
2763 
2764 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2765 {
2766     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2767     FloatParts64 p;
2768 
2769     float16a_unpack_canonical(&p, a, s, fmt16);
2770     parts_float_to_float(&p, s);
2771     return float64_round_pack_canonical(&p, s);
2772 }
2773 
2774 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2775 {
2776     FloatParts64 p;
2777     const FloatFmt *fmt;
2778 
2779     float32_unpack_canonical(&p, a, s);
2780     if (ieee) {
2781         parts_float_to_float(&p, s);
2782         fmt = &float16_params;
2783     } else {
2784         parts_float_to_ahp(&p, s);
2785         fmt = &float16_params_ahp;
2786     }
2787     return float16a_round_pack_canonical(&p, s, fmt);
2788 }
2789 
2790 static float64 QEMU_SOFTFLOAT_ATTR
2791 soft_float32_to_float64(float32 a, float_status *s)
2792 {
2793     FloatParts64 p;
2794 
2795     float32_unpack_canonical(&p, a, s);
2796     parts_float_to_float(&p, s);
2797     return float64_round_pack_canonical(&p, s);
2798 }
2799 
2800 float64 float32_to_float64(float32 a, float_status *s)
2801 {
2802     if (likely(float32_is_normal(a))) {
2803         /* Widening conversion can never produce inexact results.  */
2804         union_float32 uf;
2805         union_float64 ud;
2806         uf.s = a;
2807         ud.h = uf.h;
2808         return ud.s;
2809     } else if (float32_is_zero(a)) {
2810         return float64_set_sign(float64_zero, float32_is_neg(a));
2811     } else {
2812         return soft_float32_to_float64(a, s);
2813     }
2814 }
2815 
2816 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2817 {
2818     FloatParts64 p;
2819     const FloatFmt *fmt;
2820 
2821     float64_unpack_canonical(&p, a, s);
2822     if (ieee) {
2823         parts_float_to_float(&p, s);
2824         fmt = &float16_params;
2825     } else {
2826         parts_float_to_ahp(&p, s);
2827         fmt = &float16_params_ahp;
2828     }
2829     return float16a_round_pack_canonical(&p, s, fmt);
2830 }
2831 
2832 float32 float64_to_float32(float64 a, float_status *s)
2833 {
2834     FloatParts64 p;
2835 
2836     float64_unpack_canonical(&p, a, s);
2837     parts_float_to_float(&p, s);
2838     return float32_round_pack_canonical(&p, s);
2839 }
2840 
2841 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2842 {
2843     FloatParts64 p;
2844 
2845     bfloat16_unpack_canonical(&p, a, s);
2846     parts_float_to_float(&p, s);
2847     return float32_round_pack_canonical(&p, s);
2848 }
2849 
2850 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2851 {
2852     FloatParts64 p;
2853 
2854     bfloat16_unpack_canonical(&p, a, s);
2855     parts_float_to_float(&p, s);
2856     return float64_round_pack_canonical(&p, s);
2857 }
2858 
2859 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2860 {
2861     FloatParts64 p;
2862 
2863     float32_unpack_canonical(&p, a, s);
2864     parts_float_to_float(&p, s);
2865     return bfloat16_round_pack_canonical(&p, s);
2866 }
2867 
2868 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2869 {
2870     FloatParts64 p;
2871 
2872     float64_unpack_canonical(&p, a, s);
2873     parts_float_to_float(&p, s);
2874     return bfloat16_round_pack_canonical(&p, s);
2875 }
2876 
2877 float32 float128_to_float32(float128 a, float_status *s)
2878 {
2879     FloatParts64 p64;
2880     FloatParts128 p128;
2881 
2882     float128_unpack_canonical(&p128, a, s);
2883     parts_float_to_float_narrow(&p64, &p128, s);
2884     return float32_round_pack_canonical(&p64, s);
2885 }
2886 
2887 float64 float128_to_float64(float128 a, float_status *s)
2888 {
2889     FloatParts64 p64;
2890     FloatParts128 p128;
2891 
2892     float128_unpack_canonical(&p128, a, s);
2893     parts_float_to_float_narrow(&p64, &p128, s);
2894     return float64_round_pack_canonical(&p64, s);
2895 }
2896 
2897 float128 float32_to_float128(float32 a, float_status *s)
2898 {
2899     FloatParts64 p64;
2900     FloatParts128 p128;
2901 
2902     float32_unpack_canonical(&p64, a, s);
2903     parts_float_to_float_widen(&p128, &p64, s);
2904     return float128_round_pack_canonical(&p128, s);
2905 }
2906 
2907 float128 float64_to_float128(float64 a, float_status *s)
2908 {
2909     FloatParts64 p64;
2910     FloatParts128 p128;
2911 
2912     float64_unpack_canonical(&p64, a, s);
2913     parts_float_to_float_widen(&p128, &p64, s);
2914     return float128_round_pack_canonical(&p128, s);
2915 }
2916 
2917 float32 floatx80_to_float32(floatx80 a, float_status *s)
2918 {
2919     FloatParts64 p64;
2920     FloatParts128 p128;
2921 
2922     if (floatx80_unpack_canonical(&p128, a, s)) {
2923         parts_float_to_float_narrow(&p64, &p128, s);
2924     } else {
2925         parts_default_nan(&p64, s);
2926     }
2927     return float32_round_pack_canonical(&p64, s);
2928 }
2929 
2930 float64 floatx80_to_float64(floatx80 a, float_status *s)
2931 {
2932     FloatParts64 p64;
2933     FloatParts128 p128;
2934 
2935     if (floatx80_unpack_canonical(&p128, a, s)) {
2936         parts_float_to_float_narrow(&p64, &p128, s);
2937     } else {
2938         parts_default_nan(&p64, s);
2939     }
2940     return float64_round_pack_canonical(&p64, s);
2941 }
2942 
2943 float128 floatx80_to_float128(floatx80 a, float_status *s)
2944 {
2945     FloatParts128 p;
2946 
2947     if (floatx80_unpack_canonical(&p, a, s)) {
2948         parts_float_to_float(&p, s);
2949     } else {
2950         parts_default_nan(&p, s);
2951     }
2952     return float128_round_pack_canonical(&p, s);
2953 }
2954 
2955 floatx80 float32_to_floatx80(float32 a, float_status *s)
2956 {
2957     FloatParts64 p64;
2958     FloatParts128 p128;
2959 
2960     float32_unpack_canonical(&p64, a, s);
2961     parts_float_to_float_widen(&p128, &p64, s);
2962     return floatx80_round_pack_canonical(&p128, s);
2963 }
2964 
2965 floatx80 float64_to_floatx80(float64 a, float_status *s)
2966 {
2967     FloatParts64 p64;
2968     FloatParts128 p128;
2969 
2970     float64_unpack_canonical(&p64, a, s);
2971     parts_float_to_float_widen(&p128, &p64, s);
2972     return floatx80_round_pack_canonical(&p128, s);
2973 }
2974 
2975 floatx80 float128_to_floatx80(float128 a, float_status *s)
2976 {
2977     FloatParts128 p;
2978 
2979     float128_unpack_canonical(&p, a, s);
2980     parts_float_to_float(&p, s);
2981     return floatx80_round_pack_canonical(&p, s);
2982 }
2983 
2984 /*
2985  * Round to integral value
2986  */
2987 
2988 float16 float16_round_to_int(float16 a, float_status *s)
2989 {
2990     FloatParts64 p;
2991 
2992     float16_unpack_canonical(&p, a, s);
2993     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2994     return float16_round_pack_canonical(&p, s);
2995 }
2996 
2997 float32 float32_round_to_int(float32 a, float_status *s)
2998 {
2999     FloatParts64 p;
3000 
3001     float32_unpack_canonical(&p, a, s);
3002     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
3003     return float32_round_pack_canonical(&p, s);
3004 }
3005 
3006 float64 float64_round_to_int(float64 a, float_status *s)
3007 {
3008     FloatParts64 p;
3009 
3010     float64_unpack_canonical(&p, a, s);
3011     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3012     return float64_round_pack_canonical(&p, s);
3013 }
3014 
3015 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3016 {
3017     FloatParts64 p;
3018 
3019     bfloat16_unpack_canonical(&p, a, s);
3020     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3021     return bfloat16_round_pack_canonical(&p, s);
3022 }
3023 
3024 float128 float128_round_to_int(float128 a, float_status *s)
3025 {
3026     FloatParts128 p;
3027 
3028     float128_unpack_canonical(&p, a, s);
3029     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3030     return float128_round_pack_canonical(&p, s);
3031 }
3032 
3033 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3034 {
3035     FloatParts128 p;
3036 
3037     if (!floatx80_unpack_canonical(&p, a, status)) {
3038         return floatx80_default_nan(status);
3039     }
3040 
3041     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3042                        &floatx80_params[status->floatx80_rounding_precision]);
3043     return floatx80_round_pack_canonical(&p, status);
3044 }
3045 
3046 /*
3047  * Floating-point to signed integer conversions
3048  */
3049 
3050 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3051                               float_status *s)
3052 {
3053     FloatParts64 p;
3054 
3055     float16_unpack_canonical(&p, a, s);
3056     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3057 }
3058 
3059 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3060                                 float_status *s)
3061 {
3062     FloatParts64 p;
3063 
3064     float16_unpack_canonical(&p, a, s);
3065     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3066 }
3067 
3068 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3069                                 float_status *s)
3070 {
3071     FloatParts64 p;
3072 
3073     float16_unpack_canonical(&p, a, s);
3074     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3075 }
3076 
3077 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3078                                 float_status *s)
3079 {
3080     FloatParts64 p;
3081 
3082     float16_unpack_canonical(&p, a, s);
3083     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3084 }
3085 
3086 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3087                                 float_status *s)
3088 {
3089     FloatParts64 p;
3090 
3091     float32_unpack_canonical(&p, a, s);
3092     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3093 }
3094 
3095 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3096                                 float_status *s)
3097 {
3098     FloatParts64 p;
3099 
3100     float32_unpack_canonical(&p, a, s);
3101     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3102 }
3103 
3104 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3105                                 float_status *s)
3106 {
3107     FloatParts64 p;
3108 
3109     float32_unpack_canonical(&p, a, s);
3110     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3111 }
3112 
3113 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3114                                 float_status *s)
3115 {
3116     FloatParts64 p;
3117 
3118     float64_unpack_canonical(&p, a, s);
3119     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3120 }
3121 
3122 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3123                                 float_status *s)
3124 {
3125     FloatParts64 p;
3126 
3127     float64_unpack_canonical(&p, a, s);
3128     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3129 }
3130 
3131 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3132                                 float_status *s)
3133 {
3134     FloatParts64 p;
3135 
3136     float64_unpack_canonical(&p, a, s);
3137     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3138 }
3139 
3140 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3141                                float_status *s)
3142 {
3143     FloatParts64 p;
3144 
3145     bfloat16_unpack_canonical(&p, a, s);
3146     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3147 }
3148 
3149 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3150                                  float_status *s)
3151 {
3152     FloatParts64 p;
3153 
3154     bfloat16_unpack_canonical(&p, a, s);
3155     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3156 }
3157 
3158 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3159                                  float_status *s)
3160 {
3161     FloatParts64 p;
3162 
3163     bfloat16_unpack_canonical(&p, a, s);
3164     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3165 }
3166 
3167 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3168                                  float_status *s)
3169 {
3170     FloatParts64 p;
3171 
3172     bfloat16_unpack_canonical(&p, a, s);
3173     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3174 }
3175 
3176 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3177                                         int scale, float_status *s)
3178 {
3179     FloatParts128 p;
3180 
3181     float128_unpack_canonical(&p, a, s);
3182     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3183 }
3184 
3185 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3186                                         int scale, float_status *s)
3187 {
3188     FloatParts128 p;
3189 
3190     float128_unpack_canonical(&p, a, s);
3191     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3192 }
3193 
3194 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3195                                         int scale, float_status *s)
3196 {
3197     int flags = 0;
3198     Int128 r;
3199     FloatParts128 p;
3200 
3201     float128_unpack_canonical(&p, a, s);
3202 
3203     switch (p.cls) {
3204     case float_class_snan:
3205         flags |= float_flag_invalid_snan;
3206         /* fall through */
3207     case float_class_qnan:
3208         flags |= float_flag_invalid;
3209         r = UINT128_MAX;
3210         break;
3211 
3212     case float_class_inf:
3213         flags = float_flag_invalid | float_flag_invalid_cvti;
3214         r = p.sign ? INT128_MIN : INT128_MAX;
3215         break;
3216 
3217     case float_class_zero:
3218         return int128_zero();
3219 
3220     case float_class_normal:
3221         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3222             flags = float_flag_inexact;
3223         }
3224 
3225         if (p.exp < 127) {
3226             int shift = 127 - p.exp;
3227             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3228             if (p.sign) {
3229                 r = int128_neg(r);
3230             }
3231         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3232                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3233             r = INT128_MIN;
3234         } else {
3235             flags = float_flag_invalid | float_flag_invalid_cvti;
3236             r = p.sign ? INT128_MIN : INT128_MAX;
3237         }
3238         break;
3239 
3240     default:
3241         g_assert_not_reached();
3242     }
3243 
3244     float_raise(flags, s);
3245     return r;
3246 }
3247 
3248 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3249                                         int scale, float_status *s)
3250 {
3251     FloatParts128 p;
3252 
3253     if (!floatx80_unpack_canonical(&p, a, s)) {
3254         parts_default_nan(&p, s);
3255     }
3256     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3257 }
3258 
3259 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3260                                         int scale, float_status *s)
3261 {
3262     FloatParts128 p;
3263 
3264     if (!floatx80_unpack_canonical(&p, a, s)) {
3265         parts_default_nan(&p, s);
3266     }
3267     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3268 }
3269 
3270 int8_t float16_to_int8(float16 a, float_status *s)
3271 {
3272     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3273 }
3274 
3275 int16_t float16_to_int16(float16 a, float_status *s)
3276 {
3277     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3278 }
3279 
3280 int32_t float16_to_int32(float16 a, float_status *s)
3281 {
3282     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3283 }
3284 
3285 int64_t float16_to_int64(float16 a, float_status *s)
3286 {
3287     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3288 }
3289 
3290 int16_t float32_to_int16(float32 a, float_status *s)
3291 {
3292     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3293 }
3294 
3295 int32_t float32_to_int32(float32 a, float_status *s)
3296 {
3297     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3298 }
3299 
3300 int64_t float32_to_int64(float32 a, float_status *s)
3301 {
3302     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3303 }
3304 
3305 int16_t float64_to_int16(float64 a, float_status *s)
3306 {
3307     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3308 }
3309 
3310 int32_t float64_to_int32(float64 a, float_status *s)
3311 {
3312     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3313 }
3314 
3315 int64_t float64_to_int64(float64 a, float_status *s)
3316 {
3317     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3318 }
3319 
3320 int32_t float128_to_int32(float128 a, float_status *s)
3321 {
3322     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3323 }
3324 
3325 int64_t float128_to_int64(float128 a, float_status *s)
3326 {
3327     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3328 }
3329 
3330 Int128 float128_to_int128(float128 a, float_status *s)
3331 {
3332     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3333 }
3334 
3335 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3336 {
3337     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3338 }
3339 
3340 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3341 {
3342     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3343 }
3344 
3345 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3346 {
3347     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3348 }
3349 
3350 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3351 {
3352     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3353 }
3354 
3355 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3356 {
3357     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3358 }
3359 
3360 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3361 {
3362     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3363 }
3364 
3365 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3366 {
3367     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3368 }
3369 
3370 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3371 {
3372     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3373 }
3374 
3375 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3376 {
3377     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3378 }
3379 
3380 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3381 {
3382     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3383 }
3384 
3385 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3386 {
3387     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3388 }
3389 
3390 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3391 {
3392     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3393 }
3394 
3395 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3396 {
3397     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3398 }
3399 
3400 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3401 {
3402     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3403 }
3404 
3405 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3406 {
3407     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3408 }
3409 
3410 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3411 {
3412     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3413 }
3414 
3415 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3416 {
3417     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3418 }
3419 
3420 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3421 {
3422     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3423 }
3424 
3425 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3426 {
3427     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3428 }
3429 
3430 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3431 {
3432     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3433 }
3434 
3435 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3436 {
3437     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3438 }
3439 
3440 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3441 {
3442     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3443 }
3444 
3445 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3446 {
3447     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3448 }
3449 
3450 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3451 {
3452     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3453 }
3454 
3455 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3456                                 float_status *s)
3457 {
3458     FloatParts64 p;
3459 
3460     float64_unpack_canonical(&p, a, s);
3461     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3462 }
3463 
3464 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3465                                 float_status *s)
3466 {
3467     FloatParts64 p;
3468 
3469     float64_unpack_canonical(&p, a, s);
3470     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3471 }
3472 
3473 /*
3474  * Floating-point to unsigned integer conversions
3475  */
3476 
3477 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3478                                 float_status *s)
3479 {
3480     FloatParts64 p;
3481 
3482     float16_unpack_canonical(&p, a, s);
3483     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3484 }
3485 
3486 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3487                                   float_status *s)
3488 {
3489     FloatParts64 p;
3490 
3491     float16_unpack_canonical(&p, a, s);
3492     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3493 }
3494 
3495 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3496                                   float_status *s)
3497 {
3498     FloatParts64 p;
3499 
3500     float16_unpack_canonical(&p, a, s);
3501     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3502 }
3503 
3504 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3505                                   float_status *s)
3506 {
3507     FloatParts64 p;
3508 
3509     float16_unpack_canonical(&p, a, s);
3510     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3511 }
3512 
3513 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3514                                   float_status *s)
3515 {
3516     FloatParts64 p;
3517 
3518     float32_unpack_canonical(&p, a, s);
3519     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3520 }
3521 
3522 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3523                                   float_status *s)
3524 {
3525     FloatParts64 p;
3526 
3527     float32_unpack_canonical(&p, a, s);
3528     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3529 }
3530 
3531 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3532                                   float_status *s)
3533 {
3534     FloatParts64 p;
3535 
3536     float32_unpack_canonical(&p, a, s);
3537     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3538 }
3539 
3540 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3541                                   float_status *s)
3542 {
3543     FloatParts64 p;
3544 
3545     float64_unpack_canonical(&p, a, s);
3546     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3547 }
3548 
3549 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3550                                   float_status *s)
3551 {
3552     FloatParts64 p;
3553 
3554     float64_unpack_canonical(&p, a, s);
3555     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3556 }
3557 
3558 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3559                                   float_status *s)
3560 {
3561     FloatParts64 p;
3562 
3563     float64_unpack_canonical(&p, a, s);
3564     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3565 }
3566 
3567 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3568                                  int scale, float_status *s)
3569 {
3570     FloatParts64 p;
3571 
3572     bfloat16_unpack_canonical(&p, a, s);
3573     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3574 }
3575 
3576 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3577                                    int scale, float_status *s)
3578 {
3579     FloatParts64 p;
3580 
3581     bfloat16_unpack_canonical(&p, a, s);
3582     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3583 }
3584 
3585 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3586                                    int scale, float_status *s)
3587 {
3588     FloatParts64 p;
3589 
3590     bfloat16_unpack_canonical(&p, a, s);
3591     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3592 }
3593 
3594 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3595                                    int scale, float_status *s)
3596 {
3597     FloatParts64 p;
3598 
3599     bfloat16_unpack_canonical(&p, a, s);
3600     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3601 }
3602 
3603 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3604                                           int scale, float_status *s)
3605 {
3606     FloatParts128 p;
3607 
3608     float128_unpack_canonical(&p, a, s);
3609     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3610 }
3611 
3612 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3613                                           int scale, float_status *s)
3614 {
3615     FloatParts128 p;
3616 
3617     float128_unpack_canonical(&p, a, s);
3618     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3619 }
3620 
3621 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3622                                          int scale, float_status *s)
3623 {
3624     int flags = 0;
3625     Int128 r;
3626     FloatParts128 p;
3627 
3628     float128_unpack_canonical(&p, a, s);
3629 
3630     switch (p.cls) {
3631     case float_class_snan:
3632         flags |= float_flag_invalid_snan;
3633         /* fall through */
3634     case float_class_qnan:
3635         flags |= float_flag_invalid;
3636         r = UINT128_MAX;
3637         break;
3638 
3639     case float_class_inf:
3640         flags = float_flag_invalid | float_flag_invalid_cvti;
3641         r = p.sign ? int128_zero() : UINT128_MAX;
3642         break;
3643 
3644     case float_class_zero:
3645         return int128_zero();
3646 
3647     case float_class_normal:
3648         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3649             flags = float_flag_inexact;
3650             if (p.cls == float_class_zero) {
3651                 r = int128_zero();
3652                 break;
3653             }
3654         }
3655 
3656         if (p.sign) {
3657             flags = float_flag_invalid | float_flag_invalid_cvti;
3658             r = int128_zero();
3659         } else if (p.exp <= 127) {
3660             int shift = 127 - p.exp;
3661             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3662         } else {
3663             flags = float_flag_invalid | float_flag_invalid_cvti;
3664             r = UINT128_MAX;
3665         }
3666         break;
3667 
3668     default:
3669         g_assert_not_reached();
3670     }
3671 
3672     float_raise(flags, s);
3673     return r;
3674 }
3675 
3676 uint8_t float16_to_uint8(float16 a, float_status *s)
3677 {
3678     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3679 }
3680 
3681 uint16_t float16_to_uint16(float16 a, float_status *s)
3682 {
3683     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3684 }
3685 
3686 uint32_t float16_to_uint32(float16 a, float_status *s)
3687 {
3688     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3689 }
3690 
3691 uint64_t float16_to_uint64(float16 a, float_status *s)
3692 {
3693     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3694 }
3695 
3696 uint16_t float32_to_uint16(float32 a, float_status *s)
3697 {
3698     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3699 }
3700 
3701 uint32_t float32_to_uint32(float32 a, float_status *s)
3702 {
3703     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3704 }
3705 
3706 uint64_t float32_to_uint64(float32 a, float_status *s)
3707 {
3708     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3709 }
3710 
3711 uint16_t float64_to_uint16(float64 a, float_status *s)
3712 {
3713     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3714 }
3715 
3716 uint32_t float64_to_uint32(float64 a, float_status *s)
3717 {
3718     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3719 }
3720 
3721 uint64_t float64_to_uint64(float64 a, float_status *s)
3722 {
3723     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3724 }
3725 
3726 uint32_t float128_to_uint32(float128 a, float_status *s)
3727 {
3728     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3729 }
3730 
3731 uint64_t float128_to_uint64(float128 a, float_status *s)
3732 {
3733     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3734 }
3735 
3736 Int128 float128_to_uint128(float128 a, float_status *s)
3737 {
3738     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3739 }
3740 
3741 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3742 {
3743     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3744 }
3745 
3746 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3747 {
3748     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3749 }
3750 
3751 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3752 {
3753     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3754 }
3755 
3756 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3757 {
3758     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3759 }
3760 
3761 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3762 {
3763     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3764 }
3765 
3766 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3767 {
3768     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3769 }
3770 
3771 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3772 {
3773     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3774 }
3775 
3776 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3777 {
3778     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3779 }
3780 
3781 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3782 {
3783     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3784 }
3785 
3786 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3787 {
3788     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3789 }
3790 
3791 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3792 {
3793     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3794 }
3795 
3796 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3797 {
3798     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3799 }
3800 
3801 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3802 {
3803     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3804 }
3805 
3806 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3807 {
3808     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3809 }
3810 
3811 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3812 {
3813     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3814 }
3815 
3816 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3817 {
3818     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3819 }
3820 
3821 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3822 {
3823     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3824 }
3825 
3826 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3827 {
3828     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3829 }
3830 
3831 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3832 {
3833     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3834 }
3835 
3836 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3837 {
3838     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3839 }
3840 
3841 /*
3842  * Signed integer to floating-point conversions
3843  */
3844 
3845 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3846 {
3847     FloatParts64 p;
3848 
3849     parts_sint_to_float(&p, a, scale, status);
3850     return float16_round_pack_canonical(&p, status);
3851 }
3852 
3853 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3854 {
3855     return int64_to_float16_scalbn(a, scale, status);
3856 }
3857 
3858 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3859 {
3860     return int64_to_float16_scalbn(a, scale, status);
3861 }
3862 
3863 float16 int64_to_float16(int64_t a, float_status *status)
3864 {
3865     return int64_to_float16_scalbn(a, 0, status);
3866 }
3867 
3868 float16 int32_to_float16(int32_t a, float_status *status)
3869 {
3870     return int64_to_float16_scalbn(a, 0, status);
3871 }
3872 
3873 float16 int16_to_float16(int16_t a, float_status *status)
3874 {
3875     return int64_to_float16_scalbn(a, 0, status);
3876 }
3877 
3878 float16 int8_to_float16(int8_t a, float_status *status)
3879 {
3880     return int64_to_float16_scalbn(a, 0, status);
3881 }
3882 
3883 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3884 {
3885     FloatParts64 p;
3886 
3887     /* Without scaling, there are no overflow concerns. */
3888     if (likely(scale == 0) && can_use_fpu(status)) {
3889         union_float32 ur;
3890         ur.h = a;
3891         return ur.s;
3892     }
3893 
3894     parts64_sint_to_float(&p, a, scale, status);
3895     return float32_round_pack_canonical(&p, status);
3896 }
3897 
3898 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3899 {
3900     return int64_to_float32_scalbn(a, scale, status);
3901 }
3902 
3903 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3904 {
3905     return int64_to_float32_scalbn(a, scale, status);
3906 }
3907 
3908 float32 int64_to_float32(int64_t a, float_status *status)
3909 {
3910     return int64_to_float32_scalbn(a, 0, status);
3911 }
3912 
3913 float32 int32_to_float32(int32_t a, float_status *status)
3914 {
3915     return int64_to_float32_scalbn(a, 0, status);
3916 }
3917 
3918 float32 int16_to_float32(int16_t a, float_status *status)
3919 {
3920     return int64_to_float32_scalbn(a, 0, status);
3921 }
3922 
3923 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3924 {
3925     FloatParts64 p;
3926 
3927     /* Without scaling, there are no overflow concerns. */
3928     if (likely(scale == 0) && can_use_fpu(status)) {
3929         union_float64 ur;
3930         ur.h = a;
3931         return ur.s;
3932     }
3933 
3934     parts_sint_to_float(&p, a, scale, status);
3935     return float64_round_pack_canonical(&p, status);
3936 }
3937 
3938 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3939 {
3940     return int64_to_float64_scalbn(a, scale, status);
3941 }
3942 
3943 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3944 {
3945     return int64_to_float64_scalbn(a, scale, status);
3946 }
3947 
3948 float64 int64_to_float64(int64_t a, float_status *status)
3949 {
3950     return int64_to_float64_scalbn(a, 0, status);
3951 }
3952 
3953 float64 int32_to_float64(int32_t a, float_status *status)
3954 {
3955     return int64_to_float64_scalbn(a, 0, status);
3956 }
3957 
3958 float64 int16_to_float64(int16_t a, float_status *status)
3959 {
3960     return int64_to_float64_scalbn(a, 0, status);
3961 }
3962 
3963 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3964 {
3965     FloatParts64 p;
3966 
3967     parts_sint_to_float(&p, a, scale, status);
3968     return bfloat16_round_pack_canonical(&p, status);
3969 }
3970 
3971 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3972 {
3973     return int64_to_bfloat16_scalbn(a, scale, status);
3974 }
3975 
3976 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3977 {
3978     return int64_to_bfloat16_scalbn(a, scale, status);
3979 }
3980 
3981 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
3982 {
3983     return int64_to_bfloat16_scalbn(a, scale, status);
3984 }
3985 
3986 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3987 {
3988     return int64_to_bfloat16_scalbn(a, 0, status);
3989 }
3990 
3991 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3992 {
3993     return int64_to_bfloat16_scalbn(a, 0, status);
3994 }
3995 
3996 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3997 {
3998     return int64_to_bfloat16_scalbn(a, 0, status);
3999 }
4000 
4001 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
4002 {
4003     return int64_to_bfloat16_scalbn(a, 0, status);
4004 }
4005 
4006 float128 int128_to_float128(Int128 a, float_status *status)
4007 {
4008     FloatParts128 p = { };
4009     int shift;
4010 
4011     if (int128_nz(a)) {
4012         p.cls = float_class_normal;
4013         if (!int128_nonneg(a)) {
4014             p.sign = true;
4015             a = int128_neg(a);
4016         }
4017 
4018         shift = clz64(int128_gethi(a));
4019         if (shift == 64) {
4020             shift += clz64(int128_getlo(a));
4021         }
4022 
4023         p.exp = 127 - shift;
4024         a = int128_lshift(a, shift);
4025 
4026         p.frac_hi = int128_gethi(a);
4027         p.frac_lo = int128_getlo(a);
4028     } else {
4029         p.cls = float_class_zero;
4030     }
4031 
4032     return float128_round_pack_canonical(&p, status);
4033 }
4034 
4035 float128 int64_to_float128(int64_t a, float_status *status)
4036 {
4037     FloatParts128 p;
4038 
4039     parts_sint_to_float(&p, a, 0, status);
4040     return float128_round_pack_canonical(&p, status);
4041 }
4042 
4043 float128 int32_to_float128(int32_t a, float_status *status)
4044 {
4045     return int64_to_float128(a, status);
4046 }
4047 
4048 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4049 {
4050     FloatParts128 p;
4051 
4052     parts_sint_to_float(&p, a, 0, status);
4053     return floatx80_round_pack_canonical(&p, status);
4054 }
4055 
4056 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4057 {
4058     return int64_to_floatx80(a, status);
4059 }
4060 
4061 /*
4062  * Unsigned Integer to floating-point conversions
4063  */
4064 
4065 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4066 {
4067     FloatParts64 p;
4068 
4069     parts_uint_to_float(&p, a, scale, status);
4070     return float16_round_pack_canonical(&p, status);
4071 }
4072 
4073 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4074 {
4075     return uint64_to_float16_scalbn(a, scale, status);
4076 }
4077 
4078 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4079 {
4080     return uint64_to_float16_scalbn(a, scale, status);
4081 }
4082 
4083 float16 uint64_to_float16(uint64_t a, float_status *status)
4084 {
4085     return uint64_to_float16_scalbn(a, 0, status);
4086 }
4087 
4088 float16 uint32_to_float16(uint32_t a, float_status *status)
4089 {
4090     return uint64_to_float16_scalbn(a, 0, status);
4091 }
4092 
4093 float16 uint16_to_float16(uint16_t a, float_status *status)
4094 {
4095     return uint64_to_float16_scalbn(a, 0, status);
4096 }
4097 
4098 float16 uint8_to_float16(uint8_t a, float_status *status)
4099 {
4100     return uint64_to_float16_scalbn(a, 0, status);
4101 }
4102 
4103 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4104 {
4105     FloatParts64 p;
4106 
4107     /* Without scaling, there are no overflow concerns. */
4108     if (likely(scale == 0) && can_use_fpu(status)) {
4109         union_float32 ur;
4110         ur.h = a;
4111         return ur.s;
4112     }
4113 
4114     parts_uint_to_float(&p, a, scale, status);
4115     return float32_round_pack_canonical(&p, status);
4116 }
4117 
4118 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4119 {
4120     return uint64_to_float32_scalbn(a, scale, status);
4121 }
4122 
4123 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4124 {
4125     return uint64_to_float32_scalbn(a, scale, status);
4126 }
4127 
4128 float32 uint64_to_float32(uint64_t a, float_status *status)
4129 {
4130     return uint64_to_float32_scalbn(a, 0, status);
4131 }
4132 
4133 float32 uint32_to_float32(uint32_t a, float_status *status)
4134 {
4135     return uint64_to_float32_scalbn(a, 0, status);
4136 }
4137 
4138 float32 uint16_to_float32(uint16_t a, float_status *status)
4139 {
4140     return uint64_to_float32_scalbn(a, 0, status);
4141 }
4142 
4143 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4144 {
4145     FloatParts64 p;
4146 
4147     /* Without scaling, there are no overflow concerns. */
4148     if (likely(scale == 0) && can_use_fpu(status)) {
4149         union_float64 ur;
4150         ur.h = a;
4151         return ur.s;
4152     }
4153 
4154     parts_uint_to_float(&p, a, scale, status);
4155     return float64_round_pack_canonical(&p, status);
4156 }
4157 
4158 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4159 {
4160     return uint64_to_float64_scalbn(a, scale, status);
4161 }
4162 
4163 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4164 {
4165     return uint64_to_float64_scalbn(a, scale, status);
4166 }
4167 
4168 float64 uint64_to_float64(uint64_t a, float_status *status)
4169 {
4170     return uint64_to_float64_scalbn(a, 0, status);
4171 }
4172 
4173 float64 uint32_to_float64(uint32_t a, float_status *status)
4174 {
4175     return uint64_to_float64_scalbn(a, 0, status);
4176 }
4177 
4178 float64 uint16_to_float64(uint16_t a, float_status *status)
4179 {
4180     return uint64_to_float64_scalbn(a, 0, status);
4181 }
4182 
4183 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4184 {
4185     FloatParts64 p;
4186 
4187     parts_uint_to_float(&p, a, scale, status);
4188     return bfloat16_round_pack_canonical(&p, status);
4189 }
4190 
4191 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4192 {
4193     return uint64_to_bfloat16_scalbn(a, scale, status);
4194 }
4195 
4196 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4197 {
4198     return uint64_to_bfloat16_scalbn(a, scale, status);
4199 }
4200 
4201 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4202 {
4203     return uint64_to_bfloat16_scalbn(a, scale, status);
4204 }
4205 
4206 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4207 {
4208     return uint64_to_bfloat16_scalbn(a, 0, status);
4209 }
4210 
4211 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4212 {
4213     return uint64_to_bfloat16_scalbn(a, 0, status);
4214 }
4215 
4216 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4217 {
4218     return uint64_to_bfloat16_scalbn(a, 0, status);
4219 }
4220 
4221 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4222 {
4223     return uint64_to_bfloat16_scalbn(a, 0, status);
4224 }
4225 
4226 float128 uint64_to_float128(uint64_t a, float_status *status)
4227 {
4228     FloatParts128 p;
4229 
4230     parts_uint_to_float(&p, a, 0, status);
4231     return float128_round_pack_canonical(&p, status);
4232 }
4233 
4234 float128 uint128_to_float128(Int128 a, float_status *status)
4235 {
4236     FloatParts128 p = { };
4237     int shift;
4238 
4239     if (int128_nz(a)) {
4240         p.cls = float_class_normal;
4241 
4242         shift = clz64(int128_gethi(a));
4243         if (shift == 64) {
4244             shift += clz64(int128_getlo(a));
4245         }
4246 
4247         p.exp = 127 - shift;
4248         a = int128_lshift(a, shift);
4249 
4250         p.frac_hi = int128_gethi(a);
4251         p.frac_lo = int128_getlo(a);
4252     } else {
4253         p.cls = float_class_zero;
4254     }
4255 
4256     return float128_round_pack_canonical(&p, status);
4257 }
4258 
4259 /*
4260  * Minimum and maximum
4261  */
4262 
4263 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4264 {
4265     FloatParts64 pa, pb, *pr;
4266 
4267     float16_unpack_canonical(&pa, a, s);
4268     float16_unpack_canonical(&pb, b, s);
4269     pr = parts_minmax(&pa, &pb, s, flags);
4270 
4271     return float16_round_pack_canonical(pr, s);
4272 }
4273 
4274 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4275                                 float_status *s, int flags)
4276 {
4277     FloatParts64 pa, pb, *pr;
4278 
4279     bfloat16_unpack_canonical(&pa, a, s);
4280     bfloat16_unpack_canonical(&pb, b, s);
4281     pr = parts_minmax(&pa, &pb, s, flags);
4282 
4283     return bfloat16_round_pack_canonical(pr, s);
4284 }
4285 
4286 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4287 {
4288     FloatParts64 pa, pb, *pr;
4289 
4290     float32_unpack_canonical(&pa, a, s);
4291     float32_unpack_canonical(&pb, b, s);
4292     pr = parts_minmax(&pa, &pb, s, flags);
4293 
4294     return float32_round_pack_canonical(pr, s);
4295 }
4296 
4297 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4298 {
4299     FloatParts64 pa, pb, *pr;
4300 
4301     float64_unpack_canonical(&pa, a, s);
4302     float64_unpack_canonical(&pb, b, s);
4303     pr = parts_minmax(&pa, &pb, s, flags);
4304 
4305     return float64_round_pack_canonical(pr, s);
4306 }
4307 
4308 static float128 float128_minmax(float128 a, float128 b,
4309                                 float_status *s, int flags)
4310 {
4311     FloatParts128 pa, pb, *pr;
4312 
4313     float128_unpack_canonical(&pa, a, s);
4314     float128_unpack_canonical(&pb, b, s);
4315     pr = parts_minmax(&pa, &pb, s, flags);
4316 
4317     return float128_round_pack_canonical(pr, s);
4318 }
4319 
4320 #define MINMAX_1(type, name, flags) \
4321     type type##_##name(type a, type b, float_status *s) \
4322     { return type##_minmax(a, b, s, flags); }
4323 
4324 #define MINMAX_2(type) \
4325     MINMAX_1(type, max, 0)                                                \
4326     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4327     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4328     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4329     MINMAX_1(type, min, minmax_ismin)                                     \
4330     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4331     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4332     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4333 
4334 MINMAX_2(float16)
4335 MINMAX_2(bfloat16)
4336 MINMAX_2(float32)
4337 MINMAX_2(float64)
4338 MINMAX_2(float128)
4339 
4340 #undef MINMAX_1
4341 #undef MINMAX_2
4342 
4343 /*
4344  * Floating point compare
4345  */
4346 
4347 static FloatRelation QEMU_FLATTEN
4348 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4349 {
4350     FloatParts64 pa, pb;
4351 
4352     float16_unpack_canonical(&pa, a, s);
4353     float16_unpack_canonical(&pb, b, s);
4354     return parts_compare(&pa, &pb, s, is_quiet);
4355 }
4356 
4357 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4358 {
4359     return float16_do_compare(a, b, s, false);
4360 }
4361 
4362 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4363 {
4364     return float16_do_compare(a, b, s, true);
4365 }
4366 
4367 static FloatRelation QEMU_SOFTFLOAT_ATTR
4368 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4369 {
4370     FloatParts64 pa, pb;
4371 
4372     float32_unpack_canonical(&pa, a, s);
4373     float32_unpack_canonical(&pb, b, s);
4374     return parts_compare(&pa, &pb, s, is_quiet);
4375 }
4376 
4377 static FloatRelation QEMU_FLATTEN
4378 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4379 {
4380     union_float32 ua, ub;
4381 
4382     ua.s = xa;
4383     ub.s = xb;
4384 
4385     if (QEMU_NO_HARDFLOAT) {
4386         goto soft;
4387     }
4388 
4389     float32_input_flush2(&ua.s, &ub.s, s);
4390     if (isgreaterequal(ua.h, ub.h)) {
4391         if (isgreater(ua.h, ub.h)) {
4392             return float_relation_greater;
4393         }
4394         return float_relation_equal;
4395     }
4396     if (likely(isless(ua.h, ub.h))) {
4397         return float_relation_less;
4398     }
4399     /*
4400      * The only condition remaining is unordered.
4401      * Fall through to set flags.
4402      */
4403  soft:
4404     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4405 }
4406 
4407 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4408 {
4409     return float32_hs_compare(a, b, s, false);
4410 }
4411 
4412 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4413 {
4414     return float32_hs_compare(a, b, s, true);
4415 }
4416 
4417 static FloatRelation QEMU_SOFTFLOAT_ATTR
4418 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4419 {
4420     FloatParts64 pa, pb;
4421 
4422     float64_unpack_canonical(&pa, a, s);
4423     float64_unpack_canonical(&pb, b, s);
4424     return parts_compare(&pa, &pb, s, is_quiet);
4425 }
4426 
4427 static FloatRelation QEMU_FLATTEN
4428 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4429 {
4430     union_float64 ua, ub;
4431 
4432     ua.s = xa;
4433     ub.s = xb;
4434 
4435     if (QEMU_NO_HARDFLOAT) {
4436         goto soft;
4437     }
4438 
4439     float64_input_flush2(&ua.s, &ub.s, s);
4440     if (isgreaterequal(ua.h, ub.h)) {
4441         if (isgreater(ua.h, ub.h)) {
4442             return float_relation_greater;
4443         }
4444         return float_relation_equal;
4445     }
4446     if (likely(isless(ua.h, ub.h))) {
4447         return float_relation_less;
4448     }
4449     /*
4450      * The only condition remaining is unordered.
4451      * Fall through to set flags.
4452      */
4453  soft:
4454     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4455 }
4456 
4457 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4458 {
4459     return float64_hs_compare(a, b, s, false);
4460 }
4461 
4462 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4463 {
4464     return float64_hs_compare(a, b, s, true);
4465 }
4466 
4467 static FloatRelation QEMU_FLATTEN
4468 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4469 {
4470     FloatParts64 pa, pb;
4471 
4472     bfloat16_unpack_canonical(&pa, a, s);
4473     bfloat16_unpack_canonical(&pb, b, s);
4474     return parts_compare(&pa, &pb, s, is_quiet);
4475 }
4476 
4477 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4478 {
4479     return bfloat16_do_compare(a, b, s, false);
4480 }
4481 
4482 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4483 {
4484     return bfloat16_do_compare(a, b, s, true);
4485 }
4486 
4487 static FloatRelation QEMU_FLATTEN
4488 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4489 {
4490     FloatParts128 pa, pb;
4491 
4492     float128_unpack_canonical(&pa, a, s);
4493     float128_unpack_canonical(&pb, b, s);
4494     return parts_compare(&pa, &pb, s, is_quiet);
4495 }
4496 
4497 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4498 {
4499     return float128_do_compare(a, b, s, false);
4500 }
4501 
4502 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4503 {
4504     return float128_do_compare(a, b, s, true);
4505 }
4506 
4507 static FloatRelation QEMU_FLATTEN
4508 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4509 {
4510     FloatParts128 pa, pb;
4511 
4512     if (!floatx80_unpack_canonical(&pa, a, s) ||
4513         !floatx80_unpack_canonical(&pb, b, s)) {
4514         return float_relation_unordered;
4515     }
4516     return parts_compare(&pa, &pb, s, is_quiet);
4517 }
4518 
4519 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4520 {
4521     return floatx80_do_compare(a, b, s, false);
4522 }
4523 
4524 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4525 {
4526     return floatx80_do_compare(a, b, s, true);
4527 }
4528 
4529 /*
4530  * Scale by 2**N
4531  */
4532 
4533 float16 float16_scalbn(float16 a, int n, float_status *status)
4534 {
4535     FloatParts64 p;
4536 
4537     float16_unpack_canonical(&p, a, status);
4538     parts_scalbn(&p, n, status);
4539     return float16_round_pack_canonical(&p, status);
4540 }
4541 
4542 float32 float32_scalbn(float32 a, int n, float_status *status)
4543 {
4544     FloatParts64 p;
4545 
4546     float32_unpack_canonical(&p, a, status);
4547     parts_scalbn(&p, n, status);
4548     return float32_round_pack_canonical(&p, status);
4549 }
4550 
4551 float64 float64_scalbn(float64 a, int n, float_status *status)
4552 {
4553     FloatParts64 p;
4554 
4555     float64_unpack_canonical(&p, a, status);
4556     parts_scalbn(&p, n, status);
4557     return float64_round_pack_canonical(&p, status);
4558 }
4559 
4560 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4561 {
4562     FloatParts64 p;
4563 
4564     bfloat16_unpack_canonical(&p, a, status);
4565     parts_scalbn(&p, n, status);
4566     return bfloat16_round_pack_canonical(&p, status);
4567 }
4568 
4569 float128 float128_scalbn(float128 a, int n, float_status *status)
4570 {
4571     FloatParts128 p;
4572 
4573     float128_unpack_canonical(&p, a, status);
4574     parts_scalbn(&p, n, status);
4575     return float128_round_pack_canonical(&p, status);
4576 }
4577 
4578 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4579 {
4580     FloatParts128 p;
4581 
4582     if (!floatx80_unpack_canonical(&p, a, status)) {
4583         return floatx80_default_nan(status);
4584     }
4585     parts_scalbn(&p, n, status);
4586     return floatx80_round_pack_canonical(&p, status);
4587 }
4588 
4589 /*
4590  * Square Root
4591  */
4592 
4593 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4594 {
4595     FloatParts64 p;
4596 
4597     float16_unpack_canonical(&p, a, status);
4598     parts_sqrt(&p, status, &float16_params);
4599     return float16_round_pack_canonical(&p, status);
4600 }
4601 
4602 static float32 QEMU_SOFTFLOAT_ATTR
4603 soft_f32_sqrt(float32 a, float_status *status)
4604 {
4605     FloatParts64 p;
4606 
4607     float32_unpack_canonical(&p, a, status);
4608     parts_sqrt(&p, status, &float32_params);
4609     return float32_round_pack_canonical(&p, status);
4610 }
4611 
4612 static float64 QEMU_SOFTFLOAT_ATTR
4613 soft_f64_sqrt(float64 a, float_status *status)
4614 {
4615     FloatParts64 p;
4616 
4617     float64_unpack_canonical(&p, a, status);
4618     parts_sqrt(&p, status, &float64_params);
4619     return float64_round_pack_canonical(&p, status);
4620 }
4621 
4622 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4623 {
4624     union_float32 ua, ur;
4625 
4626     ua.s = xa;
4627     if (unlikely(!can_use_fpu(s))) {
4628         goto soft;
4629     }
4630 
4631     float32_input_flush1(&ua.s, s);
4632     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4633         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4634                        fpclassify(ua.h) == FP_ZERO) ||
4635                      signbit(ua.h))) {
4636             goto soft;
4637         }
4638     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4639                         float32_is_neg(ua.s))) {
4640         goto soft;
4641     }
4642     ur.h = sqrtf(ua.h);
4643     return ur.s;
4644 
4645  soft:
4646     return soft_f32_sqrt(ua.s, s);
4647 }
4648 
4649 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4650 {
4651     union_float64 ua, ur;
4652 
4653     ua.s = xa;
4654     if (unlikely(!can_use_fpu(s))) {
4655         goto soft;
4656     }
4657 
4658     float64_input_flush1(&ua.s, s);
4659     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4660         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4661                        fpclassify(ua.h) == FP_ZERO) ||
4662                      signbit(ua.h))) {
4663             goto soft;
4664         }
4665     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4666                         float64_is_neg(ua.s))) {
4667         goto soft;
4668     }
4669     ur.h = sqrt(ua.h);
4670     return ur.s;
4671 
4672  soft:
4673     return soft_f64_sqrt(ua.s, s);
4674 }
4675 
4676 float64 float64r32_sqrt(float64 a, float_status *status)
4677 {
4678     FloatParts64 p;
4679 
4680     float64_unpack_canonical(&p, a, status);
4681     parts_sqrt(&p, status, &float64_params);
4682     return float64r32_round_pack_canonical(&p, status);
4683 }
4684 
4685 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4686 {
4687     FloatParts64 p;
4688 
4689     bfloat16_unpack_canonical(&p, a, status);
4690     parts_sqrt(&p, status, &bfloat16_params);
4691     return bfloat16_round_pack_canonical(&p, status);
4692 }
4693 
4694 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4695 {
4696     FloatParts128 p;
4697 
4698     float128_unpack_canonical(&p, a, status);
4699     parts_sqrt(&p, status, &float128_params);
4700     return float128_round_pack_canonical(&p, status);
4701 }
4702 
4703 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4704 {
4705     FloatParts128 p;
4706 
4707     if (!floatx80_unpack_canonical(&p, a, s)) {
4708         return floatx80_default_nan(s);
4709     }
4710     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4711     return floatx80_round_pack_canonical(&p, s);
4712 }
4713 
4714 /*
4715  * log2
4716  */
4717 float32 float32_log2(float32 a, float_status *status)
4718 {
4719     FloatParts64 p;
4720 
4721     float32_unpack_canonical(&p, a, status);
4722     parts_log2(&p, status, &float32_params);
4723     return float32_round_pack_canonical(&p, status);
4724 }
4725 
4726 float64 float64_log2(float64 a, float_status *status)
4727 {
4728     FloatParts64 p;
4729 
4730     float64_unpack_canonical(&p, a, status);
4731     parts_log2(&p, status, &float64_params);
4732     return float64_round_pack_canonical(&p, status);
4733 }
4734 
4735 /*----------------------------------------------------------------------------
4736 | The pattern for a default generated NaN.
4737 *----------------------------------------------------------------------------*/
4738 
4739 float16 float16_default_nan(float_status *status)
4740 {
4741     FloatParts64 p;
4742 
4743     parts_default_nan(&p, status);
4744     p.frac >>= float16_params.frac_shift;
4745     return float16_pack_raw(&p);
4746 }
4747 
4748 float32 float32_default_nan(float_status *status)
4749 {
4750     FloatParts64 p;
4751 
4752     parts_default_nan(&p, status);
4753     p.frac >>= float32_params.frac_shift;
4754     return float32_pack_raw(&p);
4755 }
4756 
4757 float64 float64_default_nan(float_status *status)
4758 {
4759     FloatParts64 p;
4760 
4761     parts_default_nan(&p, status);
4762     p.frac >>= float64_params.frac_shift;
4763     return float64_pack_raw(&p);
4764 }
4765 
4766 float128 float128_default_nan(float_status *status)
4767 {
4768     FloatParts128 p;
4769 
4770     parts_default_nan(&p, status);
4771     frac_shr(&p, float128_params.frac_shift);
4772     return float128_pack_raw(&p);
4773 }
4774 
4775 bfloat16 bfloat16_default_nan(float_status *status)
4776 {
4777     FloatParts64 p;
4778 
4779     parts_default_nan(&p, status);
4780     p.frac >>= bfloat16_params.frac_shift;
4781     return bfloat16_pack_raw(&p);
4782 }
4783 
4784 /*----------------------------------------------------------------------------
4785 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4786 *----------------------------------------------------------------------------*/
4787 
4788 float16 float16_silence_nan(float16 a, float_status *status)
4789 {
4790     FloatParts64 p;
4791 
4792     float16_unpack_raw(&p, a);
4793     p.frac <<= float16_params.frac_shift;
4794     parts_silence_nan(&p, status);
4795     p.frac >>= float16_params.frac_shift;
4796     return float16_pack_raw(&p);
4797 }
4798 
4799 float32 float32_silence_nan(float32 a, float_status *status)
4800 {
4801     FloatParts64 p;
4802 
4803     float32_unpack_raw(&p, a);
4804     p.frac <<= float32_params.frac_shift;
4805     parts_silence_nan(&p, status);
4806     p.frac >>= float32_params.frac_shift;
4807     return float32_pack_raw(&p);
4808 }
4809 
4810 float64 float64_silence_nan(float64 a, float_status *status)
4811 {
4812     FloatParts64 p;
4813 
4814     float64_unpack_raw(&p, a);
4815     p.frac <<= float64_params.frac_shift;
4816     parts_silence_nan(&p, status);
4817     p.frac >>= float64_params.frac_shift;
4818     return float64_pack_raw(&p);
4819 }
4820 
4821 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4822 {
4823     FloatParts64 p;
4824 
4825     bfloat16_unpack_raw(&p, a);
4826     p.frac <<= bfloat16_params.frac_shift;
4827     parts_silence_nan(&p, status);
4828     p.frac >>= bfloat16_params.frac_shift;
4829     return bfloat16_pack_raw(&p);
4830 }
4831 
4832 float128 float128_silence_nan(float128 a, float_status *status)
4833 {
4834     FloatParts128 p;
4835 
4836     float128_unpack_raw(&p, a);
4837     frac_shl(&p, float128_params.frac_shift);
4838     parts_silence_nan(&p, status);
4839     frac_shr(&p, float128_params.frac_shift);
4840     return float128_pack_raw(&p);
4841 }
4842 
4843 /*----------------------------------------------------------------------------
4844 | If `a' is denormal and we are in flush-to-zero mode then set the
4845 | input-denormal exception and return zero. Otherwise just return the value.
4846 *----------------------------------------------------------------------------*/
4847 
4848 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4849 {
4850     if (p.exp == 0 && p.frac != 0) {
4851         float_raise(float_flag_input_denormal, status);
4852         return true;
4853     }
4854 
4855     return false;
4856 }
4857 
4858 float16 float16_squash_input_denormal(float16 a, float_status *status)
4859 {
4860     if (status->flush_inputs_to_zero) {
4861         FloatParts64 p;
4862 
4863         float16_unpack_raw(&p, a);
4864         if (parts_squash_denormal(p, status)) {
4865             return float16_set_sign(float16_zero, p.sign);
4866         }
4867     }
4868     return a;
4869 }
4870 
4871 float32 float32_squash_input_denormal(float32 a, float_status *status)
4872 {
4873     if (status->flush_inputs_to_zero) {
4874         FloatParts64 p;
4875 
4876         float32_unpack_raw(&p, a);
4877         if (parts_squash_denormal(p, status)) {
4878             return float32_set_sign(float32_zero, p.sign);
4879         }
4880     }
4881     return a;
4882 }
4883 
4884 float64 float64_squash_input_denormal(float64 a, float_status *status)
4885 {
4886     if (status->flush_inputs_to_zero) {
4887         FloatParts64 p;
4888 
4889         float64_unpack_raw(&p, a);
4890         if (parts_squash_denormal(p, status)) {
4891             return float64_set_sign(float64_zero, p.sign);
4892         }
4893     }
4894     return a;
4895 }
4896 
4897 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4898 {
4899     if (status->flush_inputs_to_zero) {
4900         FloatParts64 p;
4901 
4902         bfloat16_unpack_raw(&p, a);
4903         if (parts_squash_denormal(p, status)) {
4904             return bfloat16_set_sign(bfloat16_zero, p.sign);
4905         }
4906     }
4907     return a;
4908 }
4909 
4910 /*----------------------------------------------------------------------------
4911 | Normalizes the subnormal extended double-precision floating-point value
4912 | represented by the denormalized significand `aSig'.  The normalized exponent
4913 | and significand are stored at the locations pointed to by `zExpPtr' and
4914 | `zSigPtr', respectively.
4915 *----------------------------------------------------------------------------*/
4916 
4917 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4918                                 uint64_t *zSigPtr)
4919 {
4920     int8_t shiftCount;
4921 
4922     shiftCount = clz64(aSig);
4923     *zSigPtr = aSig<<shiftCount;
4924     *zExpPtr = 1 - shiftCount;
4925 }
4926 
4927 /*----------------------------------------------------------------------------
4928 | Takes two extended double-precision floating-point values `a' and `b', one
4929 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
4930 | `b' is a signaling NaN, the invalid exception is raised.
4931 *----------------------------------------------------------------------------*/
4932 
4933 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
4934 {
4935     FloatParts128 pa, pb, *pr;
4936 
4937     if (!floatx80_unpack_canonical(&pa, a, status) ||
4938         !floatx80_unpack_canonical(&pb, b, status)) {
4939         return floatx80_default_nan(status);
4940     }
4941 
4942     pr = parts_pick_nan(&pa, &pb, status);
4943     return floatx80_round_pack_canonical(pr, status);
4944 }
4945 
4946 /*----------------------------------------------------------------------------
4947 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4948 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4949 | and returns the proper extended double-precision floating-point value
4950 | corresponding to the abstract input.  Ordinarily, the abstract value is
4951 | rounded and packed into the extended double-precision format, with the
4952 | inexact exception raised if the abstract input cannot be represented
4953 | exactly.  However, if the abstract value is too large, the overflow and
4954 | inexact exceptions are raised and an infinity or maximal finite value is
4955 | returned.  If the abstract value is too small, the input value is rounded to
4956 | a subnormal number, and the underflow and inexact exceptions are raised if
4957 | the abstract input cannot be represented exactly as a subnormal extended
4958 | double-precision floating-point number.
4959 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4960 | the result is rounded to the same number of bits as single or double
4961 | precision, respectively.  Otherwise, the result is rounded to the full
4962 | precision of the extended double-precision format.
4963 |     The input significand must be normalized or smaller.  If the input
4964 | significand is not normalized, `zExp' must be 0; in that case, the result
4965 | returned is a subnormal number, and it must not require rounding.  The
4966 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4967 | Floating-Point Arithmetic.
4968 *----------------------------------------------------------------------------*/
4969 
4970 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4971                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4972                               float_status *status)
4973 {
4974     FloatRoundMode roundingMode;
4975     bool roundNearestEven, increment, isTiny;
4976     int64_t roundIncrement, roundMask, roundBits;
4977 
4978     roundingMode = status->float_rounding_mode;
4979     roundNearestEven = ( roundingMode == float_round_nearest_even );
4980     switch (roundingPrecision) {
4981     case floatx80_precision_x:
4982         goto precision80;
4983     case floatx80_precision_d:
4984         roundIncrement = UINT64_C(0x0000000000000400);
4985         roundMask = UINT64_C(0x00000000000007FF);
4986         break;
4987     case floatx80_precision_s:
4988         roundIncrement = UINT64_C(0x0000008000000000);
4989         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4990         break;
4991     default:
4992         g_assert_not_reached();
4993     }
4994     zSig0 |= ( zSig1 != 0 );
4995     switch (roundingMode) {
4996     case float_round_nearest_even:
4997     case float_round_ties_away:
4998         break;
4999     case float_round_to_zero:
5000         roundIncrement = 0;
5001         break;
5002     case float_round_up:
5003         roundIncrement = zSign ? 0 : roundMask;
5004         break;
5005     case float_round_down:
5006         roundIncrement = zSign ? roundMask : 0;
5007         break;
5008     default:
5009         abort();
5010     }
5011     roundBits = zSig0 & roundMask;
5012     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5013         if (    ( 0x7FFE < zExp )
5014              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
5015            ) {
5016             goto overflow;
5017         }
5018         if ( zExp <= 0 ) {
5019             if (status->flush_to_zero) {
5020                 float_raise(float_flag_output_denormal, status);
5021                 return packFloatx80(zSign, 0, 0);
5022             }
5023             isTiny = status->tininess_before_rounding
5024                   || (zExp < 0 )
5025                   || (zSig0 <= zSig0 + roundIncrement);
5026             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5027             zExp = 0;
5028             roundBits = zSig0 & roundMask;
5029             if (isTiny && roundBits) {
5030                 float_raise(float_flag_underflow, status);
5031             }
5032             if (roundBits) {
5033                 float_raise(float_flag_inexact, status);
5034             }
5035             zSig0 += roundIncrement;
5036             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5037             roundIncrement = roundMask + 1;
5038             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5039                 roundMask |= roundIncrement;
5040             }
5041             zSig0 &= ~ roundMask;
5042             return packFloatx80( zSign, zExp, zSig0 );
5043         }
5044     }
5045     if (roundBits) {
5046         float_raise(float_flag_inexact, status);
5047     }
5048     zSig0 += roundIncrement;
5049     if ( zSig0 < roundIncrement ) {
5050         ++zExp;
5051         zSig0 = UINT64_C(0x8000000000000000);
5052     }
5053     roundIncrement = roundMask + 1;
5054     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5055         roundMask |= roundIncrement;
5056     }
5057     zSig0 &= ~ roundMask;
5058     if ( zSig0 == 0 ) zExp = 0;
5059     return packFloatx80( zSign, zExp, zSig0 );
5060  precision80:
5061     switch (roundingMode) {
5062     case float_round_nearest_even:
5063     case float_round_ties_away:
5064         increment = ((int64_t)zSig1 < 0);
5065         break;
5066     case float_round_to_zero:
5067         increment = 0;
5068         break;
5069     case float_round_up:
5070         increment = !zSign && zSig1;
5071         break;
5072     case float_round_down:
5073         increment = zSign && zSig1;
5074         break;
5075     default:
5076         abort();
5077     }
5078     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5079         if (    ( 0x7FFE < zExp )
5080              || (    ( zExp == 0x7FFE )
5081                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5082                   && increment
5083                 )
5084            ) {
5085             roundMask = 0;
5086  overflow:
5087             float_raise(float_flag_overflow | float_flag_inexact, status);
5088             if (    ( roundingMode == float_round_to_zero )
5089                  || ( zSign && ( roundingMode == float_round_up ) )
5090                  || ( ! zSign && ( roundingMode == float_round_down ) )
5091                ) {
5092                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5093             }
5094             return packFloatx80(zSign,
5095                                 floatx80_infinity_high,
5096                                 floatx80_infinity_low);
5097         }
5098         if ( zExp <= 0 ) {
5099             isTiny = status->tininess_before_rounding
5100                   || (zExp < 0)
5101                   || !increment
5102                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5103             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5104             zExp = 0;
5105             if (isTiny && zSig1) {
5106                 float_raise(float_flag_underflow, status);
5107             }
5108             if (zSig1) {
5109                 float_raise(float_flag_inexact, status);
5110             }
5111             switch (roundingMode) {
5112             case float_round_nearest_even:
5113             case float_round_ties_away:
5114                 increment = ((int64_t)zSig1 < 0);
5115                 break;
5116             case float_round_to_zero:
5117                 increment = 0;
5118                 break;
5119             case float_round_up:
5120                 increment = !zSign && zSig1;
5121                 break;
5122             case float_round_down:
5123                 increment = zSign && zSig1;
5124                 break;
5125             default:
5126                 abort();
5127             }
5128             if ( increment ) {
5129                 ++zSig0;
5130                 if (!(zSig1 << 1) && roundNearestEven) {
5131                     zSig0 &= ~1;
5132                 }
5133                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5134             }
5135             return packFloatx80( zSign, zExp, zSig0 );
5136         }
5137     }
5138     if (zSig1) {
5139         float_raise(float_flag_inexact, status);
5140     }
5141     if ( increment ) {
5142         ++zSig0;
5143         if ( zSig0 == 0 ) {
5144             ++zExp;
5145             zSig0 = UINT64_C(0x8000000000000000);
5146         }
5147         else {
5148             if (!(zSig1 << 1) && roundNearestEven) {
5149                 zSig0 &= ~1;
5150             }
5151         }
5152     }
5153     else {
5154         if ( zSig0 == 0 ) zExp = 0;
5155     }
5156     return packFloatx80( zSign, zExp, zSig0 );
5157 
5158 }
5159 
5160 /*----------------------------------------------------------------------------
5161 | Takes an abstract floating-point value having sign `zSign', exponent
5162 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5163 | and returns the proper extended double-precision floating-point value
5164 | corresponding to the abstract input.  This routine is just like
5165 | `roundAndPackFloatx80' except that the input significand does not have to be
5166 | normalized.
5167 *----------------------------------------------------------------------------*/
5168 
5169 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5170                                        bool zSign, int32_t zExp,
5171                                        uint64_t zSig0, uint64_t zSig1,
5172                                        float_status *status)
5173 {
5174     int8_t shiftCount;
5175 
5176     if ( zSig0 == 0 ) {
5177         zSig0 = zSig1;
5178         zSig1 = 0;
5179         zExp -= 64;
5180     }
5181     shiftCount = clz64(zSig0);
5182     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5183     zExp -= shiftCount;
5184     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5185                                 zSig0, zSig1, status);
5186 
5187 }
5188 
5189 /*----------------------------------------------------------------------------
5190 | Returns the binary exponential of the single-precision floating-point value
5191 | `a'. The operation is performed according to the IEC/IEEE Standard for
5192 | Binary Floating-Point Arithmetic.
5193 |
5194 | Uses the following identities:
5195 |
5196 | 1. -------------------------------------------------------------------------
5197 |      x    x*ln(2)
5198 |     2  = e
5199 |
5200 | 2. -------------------------------------------------------------------------
5201 |                      2     3     4     5           n
5202 |      x        x     x     x     x     x           x
5203 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5204 |               1!    2!    3!    4!    5!          n!
5205 *----------------------------------------------------------------------------*/
5206 
5207 static const float64 float32_exp2_coefficients[15] =
5208 {
5209     const_float64( 0x3ff0000000000000ll ), /*  1 */
5210     const_float64( 0x3fe0000000000000ll ), /*  2 */
5211     const_float64( 0x3fc5555555555555ll ), /*  3 */
5212     const_float64( 0x3fa5555555555555ll ), /*  4 */
5213     const_float64( 0x3f81111111111111ll ), /*  5 */
5214     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5215     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5216     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5217     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5218     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5219     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5220     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5221     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5222     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5223     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5224 };
5225 
5226 float32 float32_exp2(float32 a, float_status *status)
5227 {
5228     FloatParts64 xp, xnp, tp, rp;
5229     int i;
5230 
5231     float32_unpack_canonical(&xp, a, status);
5232     if (unlikely(xp.cls != float_class_normal)) {
5233         switch (xp.cls) {
5234         case float_class_snan:
5235         case float_class_qnan:
5236             parts_return_nan(&xp, status);
5237             return float32_round_pack_canonical(&xp, status);
5238         case float_class_inf:
5239             return xp.sign ? float32_zero : a;
5240         case float_class_zero:
5241             return float32_one;
5242         default:
5243             break;
5244         }
5245         g_assert_not_reached();
5246     }
5247 
5248     float_raise(float_flag_inexact, status);
5249 
5250     float64_unpack_canonical(&tp, float64_ln2, status);
5251     xp = *parts_mul(&xp, &tp, status);
5252     xnp = xp;
5253 
5254     float64_unpack_canonical(&rp, float64_one, status);
5255     for (i = 0 ; i < 15 ; i++) {
5256 
5257         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5258         rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
5259         xnp = *parts_mul(&xnp, &xp, status);
5260     }
5261 
5262     return float32_round_pack_canonical(&rp, status);
5263 }
5264 
5265 /*----------------------------------------------------------------------------
5266 | Rounds the extended double-precision floating-point value `a'
5267 | to the precision provided by floatx80_rounding_precision and returns the
5268 | result as an extended double-precision floating-point value.
5269 | The operation is performed according to the IEC/IEEE Standard for Binary
5270 | Floating-Point Arithmetic.
5271 *----------------------------------------------------------------------------*/
5272 
5273 floatx80 floatx80_round(floatx80 a, float_status *status)
5274 {
5275     FloatParts128 p;
5276 
5277     if (!floatx80_unpack_canonical(&p, a, status)) {
5278         return floatx80_default_nan(status);
5279     }
5280     return floatx80_round_pack_canonical(&p, status);
5281 }
5282 
5283 static void __attribute__((constructor)) softfloat_init(void)
5284 {
5285     union_float64 ua, ub, uc, ur;
5286 
5287     if (QEMU_NO_HARDFLOAT) {
5288         return;
5289     }
5290     /*
5291      * Test that the host's FMA is not obviously broken. For example,
5292      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5293      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5294      */
5295     ua.s = 0x0020000000000001ULL;
5296     ub.s = 0x3ca0000000000000ULL;
5297     uc.s = 0x0020000000000000ULL;
5298     ur.h = fma(ua.h, ub.h, uc.h);
5299     if (ur.s != 0x0020000000000001ULL) {
5300         force_soft_fma = true;
5301     }
5302 }
5303