xref: /qemu/target/hexagon/fma_emu.c (revision ccf86c392c5b8949bafd363e44d3abb112578044)
1  /*
2   *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
3   *
4   *  This program is free software; you can redistribute it and/or modify
5   *  it under the terms of the GNU General Public License as published by
6   *  the Free Software Foundation; either version 2 of the License, or
7   *  (at your option) any later version.
8   *
9   *  This program is distributed in the hope that it will be useful,
10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   *  GNU General Public License for more details.
13   *
14   *  You should have received a copy of the GNU General Public License
15   *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16   */
17  
18  #include "qemu/osdep.h"
19  #include "qemu/int128.h"
20  #include "fpu/softfloat.h"
21  #include "macros.h"
22  #include "fma_emu.h"
23  
24  #define DF_INF_EXP     0x7ff
25  #define DF_BIAS        1023
26  #define DF_MANTBITS    52
27  #define DF_NAN         0xffffffffffffffffULL
28  #define DF_INF         0x7ff0000000000000ULL
29  #define DF_MINUS_INF   0xfff0000000000000ULL
30  #define DF_MAXF        0x7fefffffffffffffULL
31  #define DF_MINUS_MAXF  0xffefffffffffffffULL
32  
33  #define SF_INF_EXP     0xff
34  #define SF_BIAS        127
35  #define SF_MANTBITS    23
36  #define SF_INF         0x7f800000
37  #define SF_MINUS_INF   0xff800000
38  #define SF_MAXF        0x7f7fffff
39  #define SF_MINUS_MAXF  0xff7fffff
40  
41  #define HF_INF_EXP 0x1f
42  #define HF_BIAS 15
43  
44  #define WAY_BIG_EXP 4096
45  
46  static uint64_t float64_getmant(float64 f64)
47  {
48      uint64_t mant = extract64(f64, 0, 52);
49      if (float64_is_normal(f64)) {
50          return mant | 1ULL << 52;
51      }
52      if (float64_is_zero(f64)) {
53          return 0;
54      }
55      if (float64_is_denormal(f64)) {
56          return mant;
57      }
58      return ~0ULL;
59  }
60  
61  int32_t float64_getexp(float64 f64)
62  {
63      int exp = extract64(f64, 52, 11);
64      if (float64_is_normal(f64)) {
65          return exp;
66      }
67      if (float64_is_denormal(f64)) {
68          return exp + 1;
69      }
70      return -1;
71  }
72  
73  int32_t float32_getexp(float32 f32)
74  {
75      int exp = float32_getexp_raw(f32);
76      if (float32_is_normal(f32)) {
77          return exp;
78      }
79      if (float32_is_denormal(f32)) {
80          return exp + 1;
81      }
82      return -1;
83  }
84  
85  static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
86  {
87      uint64_t l, h;
88  
89      mulu64(&l, &h, ai, bi);
90      return int128_make128(l, h);
91  }
92  
93  static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
94  {
95      Int128 ret = int128_sub(a, b);
96      if (borrow != 0) {
97          ret = int128_sub(ret, int128_one());
98      }
99      return ret;
100  }
101  
102  typedef struct {
103      Int128 mant;
104      int32_t exp;
105      uint8_t sign;
106      uint8_t guard;
107      uint8_t round;
108      uint8_t sticky;
109  } Accum;
110  
111  static void accum_init(Accum *p)
112  {
113      p->mant = int128_zero();
114      p->exp = 0;
115      p->sign = 0;
116      p->guard = 0;
117      p->round = 0;
118      p->sticky = 0;
119  }
120  
121  static Accum accum_norm_left(Accum a)
122  {
123      a.exp--;
124      a.mant = int128_lshift(a.mant, 1);
125      a.mant = int128_or(a.mant, int128_make64(a.guard));
126      a.guard = a.round;
127      a.round = a.sticky;
128      return a;
129  }
130  
131  /* This function is marked inline for performance reasons */
132  static inline Accum accum_norm_right(Accum a, int amt)
133  {
134      if (amt > 130) {
135          a.sticky |=
136              a.round | a.guard | int128_nz(a.mant);
137          a.guard = a.round = 0;
138          a.mant = int128_zero();
139          a.exp += amt;
140          return a;
141  
142      }
143      while (amt >= 64) {
144          a.sticky |= a.round | a.guard | (int128_getlo(a.mant) != 0);
145          a.guard = (int128_getlo(a.mant) >> 63) & 1;
146          a.round = (int128_getlo(a.mant) >> 62) & 1;
147          a.mant = int128_make64(int128_gethi(a.mant));
148          a.exp += 64;
149          amt -= 64;
150      }
151      while (amt > 0) {
152          a.exp++;
153          a.sticky |= a.round;
154          a.round = a.guard;
155          a.guard = int128_getlo(a.mant) & 1;
156          a.mant = int128_rshift(a.mant, 1);
157          amt--;
158      }
159      return a;
160  }
161  
162  /*
163   * On the add/sub, we need to be able to shift out lots of bits, but need a
164   * sticky bit for what was shifted out, I think.
165   */
166  static Accum accum_add(Accum a, Accum b);
167  
168  static Accum accum_sub(Accum a, Accum b, int negate)
169  {
170      Accum ret;
171      accum_init(&ret);
172      int borrow;
173  
174      if (a.sign != b.sign) {
175          b.sign = !b.sign;
176          return accum_add(a, b);
177      }
178      if (b.exp > a.exp) {
179          /* small - big == - (big - small) */
180          return accum_sub(b, a, !negate);
181      }
182      if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) {
183          /* small - big == - (big - small) */
184          return accum_sub(b, a, !negate);
185      }
186  
187      while (a.exp > b.exp) {
188          /* Try to normalize exponents: shrink a exponent and grow mantissa */
189          if (int128_gethi(a.mant) & (1ULL << 62)) {
190              /* Can't grow a any more */
191              break;
192          } else {
193              a = accum_norm_left(a);
194          }
195      }
196  
197      while (a.exp > b.exp) {
198          /* Try to normalize exponents: grow b exponent and shrink mantissa */
199          /* Keep around shifted out bits... we might need those later */
200          b = accum_norm_right(b, a.exp - b.exp);
201      }
202  
203      if ((int128_gt(b.mant, a.mant))) {
204          return accum_sub(b, a, !negate);
205      }
206  
207      /* OK, now things should be normalized! */
208      ret.sign = a.sign;
209      ret.exp = a.exp;
210      assert(!int128_gt(b.mant, a.mant));
211      borrow = (b.round << 2) | (b.guard << 1) | b.sticky;
212      ret.mant = int128_sub_borrow(a.mant, b.mant, (borrow != 0));
213      borrow = 0 - borrow;
214      ret.guard = (borrow >> 2) & 1;
215      ret.round = (borrow >> 1) & 1;
216      ret.sticky = (borrow >> 0) & 1;
217      if (negate) {
218          ret.sign = !ret.sign;
219      }
220      return ret;
221  }
222  
223  static Accum accum_add(Accum a, Accum b)
224  {
225      Accum ret;
226      accum_init(&ret);
227      if (a.sign != b.sign) {
228          b.sign = !b.sign;
229          return accum_sub(a, b, 0);
230      }
231      if (b.exp > a.exp) {
232          /* small + big ==  (big + small) */
233          return accum_add(b, a);
234      }
235      if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) {
236          /* small + big ==  (big + small) */
237          return accum_add(b, a);
238      }
239  
240      while (a.exp > b.exp) {
241          /* Try to normalize exponents: shrink a exponent and grow mantissa */
242          if (int128_gethi(a.mant) & (1ULL << 62)) {
243              /* Can't grow a any more */
244              break;
245          } else {
246              a = accum_norm_left(a);
247          }
248      }
249  
250      while (a.exp > b.exp) {
251          /* Try to normalize exponents: grow b exponent and shrink mantissa */
252          /* Keep around shifted out bits... we might need those later */
253          b = accum_norm_right(b, a.exp - b.exp);
254      }
255  
256      /* OK, now things should be normalized! */
257      if (int128_gt(b.mant, a.mant)) {
258          return accum_add(b, a);
259      };
260      ret.sign = a.sign;
261      ret.exp = a.exp;
262      assert(!int128_gt(b.mant, a.mant));
263      ret.mant = int128_add(a.mant, b.mant);
264      ret.guard = b.guard;
265      ret.round = b.round;
266      ret.sticky = b.sticky;
267      return ret;
268  }
269  
270  /* Return an infinity with requested sign */
271  static float64 infinite_float64(uint8_t sign)
272  {
273      if (sign) {
274          return make_float64(DF_MINUS_INF);
275      } else {
276          return make_float64(DF_INF);
277      }
278  }
279  
280  /* Return a maximum finite value with requested sign */
281  static float64 maxfinite_float64(uint8_t sign)
282  {
283      if (sign) {
284          return make_float64(DF_MINUS_MAXF);
285      } else {
286          return make_float64(DF_MAXF);
287      }
288  }
289  
290  /* Return a zero value with requested sign */
291  static float64 zero_float64(uint8_t sign)
292  {
293      if (sign) {
294          return make_float64(0x8000000000000000);
295      } else {
296          return float64_zero;
297      }
298  }
299  
300  /* Return an infinity with the requested sign */
301  float32 infinite_float32(uint8_t sign)
302  {
303      if (sign) {
304          return make_float32(SF_MINUS_INF);
305      } else {
306          return make_float32(SF_INF);
307      }
308  }
309  
310  /* Return a maximum finite value with the requested sign */
311  static float64 accum_round_float64(Accum a, float_status *fp_status)
312  {
313      uint64_t ret;
314  
315      if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
316          && ((a.guard | a.round | a.sticky) == 0)) {
317          /* result zero */
318          switch (fp_status->float_rounding_mode) {
319          case float_round_down:
320              return zero_float64(1);
321          default:
322              return zero_float64(0);
323          }
324      }
325      /*
326       * Normalize right
327       * We want DF_MANTBITS bits of mantissa plus the leading one.
328       * That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF
329       * So we need to normalize right while the high word is non-zero and
330       * while the low word is nonzero when masked with 0xffe0_0000_0000_0000
331       */
332      while ((int128_gethi(a.mant) != 0) ||
333             ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) {
334          a = accum_norm_right(a, 1);
335      }
336      /*
337       * OK, now normalize left
338       * We want to normalize left until we have a leading one in bit 24
339       * Theoretically, we only need to shift a maximum of one to the left if we
340       * shifted out lots of bits from B, or if we had no shift / 1 shift sticky
341       * should be 0
342       */
343      while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) {
344          a = accum_norm_left(a);
345      }
346      /*
347       * OK, now we might need to denormalize because of potential underflow.
348       * We need to do this before rounding, and rounding might make us normal
349       * again
350       */
351      while (a.exp <= 0) {
352          a = accum_norm_right(a, 1 - a.exp);
353          /*
354           * Do we have underflow?
355           * That's when we get an inexact answer because we ran out of bits
356           * in a denormal.
357           */
358          if (a.guard || a.round || a.sticky) {
359              float_raise(float_flag_underflow, fp_status);
360          }
361      }
362      /* OK, we're relatively canonical... now we need to round */
363      if (a.guard || a.round || a.sticky) {
364          float_raise(float_flag_inexact, fp_status);
365          switch (fp_status->float_rounding_mode) {
366          case float_round_to_zero:
367              /* Chop and we're done */
368              break;
369          case float_round_up:
370              if (a.sign == 0) {
371                  a.mant = int128_add(a.mant, int128_one());
372              }
373              break;
374          case float_round_down:
375              if (a.sign != 0) {
376                  a.mant = int128_add(a.mant, int128_one());
377              }
378              break;
379          default:
380              if (a.round || a.sticky) {
381                  /* round up if guard is 1, down if guard is zero */
382                  a.mant = int128_add(a.mant, int128_make64(a.guard));
383              } else if (a.guard) {
384                  /* exactly .5, round up if odd */
385                  a.mant = int128_add(a.mant, int128_and(a.mant, int128_one()));
386              }
387              break;
388          }
389      }
390      /*
391       * OK, now we might have carried all the way up.
392       * So we might need to shr once
393       * at least we know that the lsb should be zero if we rounded and
394       * got a carry out...
395       */
396      if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) {
397          a = accum_norm_right(a, 1);
398      }
399      /* Overflow? */
400      if (a.exp >= DF_INF_EXP) {
401          /* Yep, inf result */
402          float_raise(float_flag_overflow, fp_status);
403          float_raise(float_flag_inexact, fp_status);
404          switch (fp_status->float_rounding_mode) {
405          case float_round_to_zero:
406              return maxfinite_float64(a.sign);
407          case float_round_up:
408              if (a.sign == 0) {
409                  return infinite_float64(a.sign);
410              } else {
411                  return maxfinite_float64(a.sign);
412              }
413          case float_round_down:
414              if (a.sign != 0) {
415                  return infinite_float64(a.sign);
416              } else {
417                  return maxfinite_float64(a.sign);
418              }
419          default:
420              return infinite_float64(a.sign);
421          }
422      }
423      /* Underflow? */
424      ret = int128_getlo(a.mant);
425      if (ret & (1ULL << DF_MANTBITS)) {
426          /* Leading one means: No, we're normal. So, we should be done... */
427          ret = deposit64(ret, 52, 11, a.exp);
428      } else {
429          assert(a.exp == 1);
430          ret = deposit64(ret, 52, 11, 0);
431      }
432      ret = deposit64(ret, 63, 1, a.sign);
433      return ret;
434  }
435  
436  float64 internal_mpyhh(float64 a, float64 b,
437                        unsigned long long int accumulated,
438                        float_status *fp_status)
439  {
440      Accum x;
441      unsigned long long int prod;
442      unsigned int sticky;
443      uint8_t a_sign, b_sign;
444  
445      sticky = accumulated & 1;
446      accumulated >>= 1;
447      accum_init(&x);
448      if (float64_is_zero(a) ||
449          float64_is_any_nan(a) ||
450          float64_is_infinity(a)) {
451          return float64_mul(a, b, fp_status);
452      }
453      if (float64_is_zero(b) ||
454          float64_is_any_nan(b) ||
455          float64_is_infinity(b)) {
456          return float64_mul(a, b, fp_status);
457      }
458      x.mant = int128_make64(accumulated);
459      x.sticky = sticky;
460      prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
461      x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
462      x.exp = float64_getexp(a) + float64_getexp(b) - DF_BIAS - 20;
463      if (!float64_is_normal(a) || !float64_is_normal(b)) {
464          /* crush to inexact zero */
465          x.sticky = 1;
466          x.exp = -4096;
467      }
468      a_sign = float64_is_neg(a);
469      b_sign = float64_is_neg(b);
470      x.sign = a_sign ^ b_sign;
471      return accum_round_float64(x, fp_status);
472  }
473