1 /* 2 * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "qemu/int128.h" 20 #include "fpu/softfloat.h" 21 #include "macros.h" 22 #include "fma_emu.h" 23 24 #define DF_INF_EXP 0x7ff 25 #define DF_BIAS 1023 26 #define DF_MANTBITS 52 27 #define DF_NAN 0xffffffffffffffffULL 28 #define DF_INF 0x7ff0000000000000ULL 29 #define DF_MINUS_INF 0xfff0000000000000ULL 30 #define DF_MAXF 0x7fefffffffffffffULL 31 #define DF_MINUS_MAXF 0xffefffffffffffffULL 32 33 #define SF_INF_EXP 0xff 34 #define SF_BIAS 127 35 #define SF_MANTBITS 23 36 #define SF_INF 0x7f800000 37 #define SF_MINUS_INF 0xff800000 38 #define SF_MAXF 0x7f7fffff 39 #define SF_MINUS_MAXF 0xff7fffff 40 41 #define HF_INF_EXP 0x1f 42 #define HF_BIAS 15 43 44 #define WAY_BIG_EXP 4096 45 46 static uint64_t float64_getmant(float64 f64) 47 { 48 uint64_t mant = extract64(f64, 0, 52); 49 if (float64_is_normal(f64)) { 50 return mant | 1ULL << 52; 51 } 52 if (float64_is_zero(f64)) { 53 return 0; 54 } 55 if (float64_is_denormal(f64)) { 56 return mant; 57 } 58 return ~0ULL; 59 } 60 61 int32_t float64_getexp(float64 f64) 62 { 63 int exp = extract64(f64, 52, 11); 64 if (float64_is_normal(f64)) { 65 return exp; 66 } 67 if (float64_is_denormal(f64)) { 68 return exp + 1; 69 } 70 return -1; 71 } 72 73 int32_t float32_getexp(float32 f32) 74 { 75 int exp = float32_getexp_raw(f32); 76 if (float32_is_normal(f32)) { 77 return exp; 78 } 79 if (float32_is_denormal(f32)) { 80 return exp + 1; 81 } 82 return -1; 83 } 84 85 static Int128 int128_mul_6464(uint64_t ai, uint64_t bi) 86 { 87 uint64_t l, h; 88 89 mulu64(&l, &h, ai, bi); 90 return int128_make128(l, h); 91 } 92 93 static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow) 94 { 95 Int128 ret = int128_sub(a, b); 96 if (borrow != 0) { 97 ret = int128_sub(ret, int128_one()); 98 } 99 return ret; 100 } 101 102 typedef struct { 103 Int128 mant; 104 int32_t exp; 105 uint8_t sign; 106 uint8_t guard; 107 uint8_t round; 108 uint8_t sticky; 109 } Accum; 110 111 static void accum_init(Accum *p) 112 { 113 p->mant = int128_zero(); 114 p->exp = 0; 115 p->sign = 0; 116 p->guard = 0; 117 p->round = 0; 118 p->sticky = 0; 119 } 120 121 static Accum accum_norm_left(Accum a) 122 { 123 a.exp--; 124 a.mant = int128_lshift(a.mant, 1); 125 a.mant = int128_or(a.mant, int128_make64(a.guard)); 126 a.guard = a.round; 127 a.round = a.sticky; 128 return a; 129 } 130 131 /* This function is marked inline for performance reasons */ 132 static inline Accum accum_norm_right(Accum a, int amt) 133 { 134 if (amt > 130) { 135 a.sticky |= 136 a.round | a.guard | int128_nz(a.mant); 137 a.guard = a.round = 0; 138 a.mant = int128_zero(); 139 a.exp += amt; 140 return a; 141 142 } 143 while (amt >= 64) { 144 a.sticky |= a.round | a.guard | (int128_getlo(a.mant) != 0); 145 a.guard = (int128_getlo(a.mant) >> 63) & 1; 146 a.round = (int128_getlo(a.mant) >> 62) & 1; 147 a.mant = int128_make64(int128_gethi(a.mant)); 148 a.exp += 64; 149 amt -= 64; 150 } 151 while (amt > 0) { 152 a.exp++; 153 a.sticky |= a.round; 154 a.round = a.guard; 155 a.guard = int128_getlo(a.mant) & 1; 156 a.mant = int128_rshift(a.mant, 1); 157 amt--; 158 } 159 return a; 160 } 161 162 /* 163 * On the add/sub, we need to be able to shift out lots of bits, but need a 164 * sticky bit for what was shifted out, I think. 165 */ 166 static Accum accum_add(Accum a, Accum b); 167 168 static Accum accum_sub(Accum a, Accum b, int negate) 169 { 170 Accum ret; 171 accum_init(&ret); 172 int borrow; 173 174 if (a.sign != b.sign) { 175 b.sign = !b.sign; 176 return accum_add(a, b); 177 } 178 if (b.exp > a.exp) { 179 /* small - big == - (big - small) */ 180 return accum_sub(b, a, !negate); 181 } 182 if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) { 183 /* small - big == - (big - small) */ 184 return accum_sub(b, a, !negate); 185 } 186 187 while (a.exp > b.exp) { 188 /* Try to normalize exponents: shrink a exponent and grow mantissa */ 189 if (int128_gethi(a.mant) & (1ULL << 62)) { 190 /* Can't grow a any more */ 191 break; 192 } else { 193 a = accum_norm_left(a); 194 } 195 } 196 197 while (a.exp > b.exp) { 198 /* Try to normalize exponents: grow b exponent and shrink mantissa */ 199 /* Keep around shifted out bits... we might need those later */ 200 b = accum_norm_right(b, a.exp - b.exp); 201 } 202 203 if ((int128_gt(b.mant, a.mant))) { 204 return accum_sub(b, a, !negate); 205 } 206 207 /* OK, now things should be normalized! */ 208 ret.sign = a.sign; 209 ret.exp = a.exp; 210 assert(!int128_gt(b.mant, a.mant)); 211 borrow = (b.round << 2) | (b.guard << 1) | b.sticky; 212 ret.mant = int128_sub_borrow(a.mant, b.mant, (borrow != 0)); 213 borrow = 0 - borrow; 214 ret.guard = (borrow >> 2) & 1; 215 ret.round = (borrow >> 1) & 1; 216 ret.sticky = (borrow >> 0) & 1; 217 if (negate) { 218 ret.sign = !ret.sign; 219 } 220 return ret; 221 } 222 223 static Accum accum_add(Accum a, Accum b) 224 { 225 Accum ret; 226 accum_init(&ret); 227 if (a.sign != b.sign) { 228 b.sign = !b.sign; 229 return accum_sub(a, b, 0); 230 } 231 if (b.exp > a.exp) { 232 /* small + big == (big + small) */ 233 return accum_add(b, a); 234 } 235 if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) { 236 /* small + big == (big + small) */ 237 return accum_add(b, a); 238 } 239 240 while (a.exp > b.exp) { 241 /* Try to normalize exponents: shrink a exponent and grow mantissa */ 242 if (int128_gethi(a.mant) & (1ULL << 62)) { 243 /* Can't grow a any more */ 244 break; 245 } else { 246 a = accum_norm_left(a); 247 } 248 } 249 250 while (a.exp > b.exp) { 251 /* Try to normalize exponents: grow b exponent and shrink mantissa */ 252 /* Keep around shifted out bits... we might need those later */ 253 b = accum_norm_right(b, a.exp - b.exp); 254 } 255 256 /* OK, now things should be normalized! */ 257 if (int128_gt(b.mant, a.mant)) { 258 return accum_add(b, a); 259 }; 260 ret.sign = a.sign; 261 ret.exp = a.exp; 262 assert(!int128_gt(b.mant, a.mant)); 263 ret.mant = int128_add(a.mant, b.mant); 264 ret.guard = b.guard; 265 ret.round = b.round; 266 ret.sticky = b.sticky; 267 return ret; 268 } 269 270 /* Return an infinity with requested sign */ 271 static float64 infinite_float64(uint8_t sign) 272 { 273 if (sign) { 274 return make_float64(DF_MINUS_INF); 275 } else { 276 return make_float64(DF_INF); 277 } 278 } 279 280 /* Return a maximum finite value with requested sign */ 281 static float64 maxfinite_float64(uint8_t sign) 282 { 283 if (sign) { 284 return make_float64(DF_MINUS_MAXF); 285 } else { 286 return make_float64(DF_MAXF); 287 } 288 } 289 290 /* Return a zero value with requested sign */ 291 static float64 zero_float64(uint8_t sign) 292 { 293 if (sign) { 294 return make_float64(0x8000000000000000); 295 } else { 296 return float64_zero; 297 } 298 } 299 300 /* Return an infinity with the requested sign */ 301 float32 infinite_float32(uint8_t sign) 302 { 303 if (sign) { 304 return make_float32(SF_MINUS_INF); 305 } else { 306 return make_float32(SF_INF); 307 } 308 } 309 310 /* Return a maximum finite value with the requested sign */ 311 static float64 accum_round_float64(Accum a, float_status *fp_status) 312 { 313 uint64_t ret; 314 315 if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) 316 && ((a.guard | a.round | a.sticky) == 0)) { 317 /* result zero */ 318 switch (fp_status->float_rounding_mode) { 319 case float_round_down: 320 return zero_float64(1); 321 default: 322 return zero_float64(0); 323 } 324 } 325 /* 326 * Normalize right 327 * We want DF_MANTBITS bits of mantissa plus the leading one. 328 * That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF 329 * So we need to normalize right while the high word is non-zero and 330 * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 331 */ 332 while ((int128_gethi(a.mant) != 0) || 333 ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) { 334 a = accum_norm_right(a, 1); 335 } 336 /* 337 * OK, now normalize left 338 * We want to normalize left until we have a leading one in bit 24 339 * Theoretically, we only need to shift a maximum of one to the left if we 340 * shifted out lots of bits from B, or if we had no shift / 1 shift sticky 341 * should be 0 342 */ 343 while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) { 344 a = accum_norm_left(a); 345 } 346 /* 347 * OK, now we might need to denormalize because of potential underflow. 348 * We need to do this before rounding, and rounding might make us normal 349 * again 350 */ 351 while (a.exp <= 0) { 352 a = accum_norm_right(a, 1 - a.exp); 353 /* 354 * Do we have underflow? 355 * That's when we get an inexact answer because we ran out of bits 356 * in a denormal. 357 */ 358 if (a.guard || a.round || a.sticky) { 359 float_raise(float_flag_underflow, fp_status); 360 } 361 } 362 /* OK, we're relatively canonical... now we need to round */ 363 if (a.guard || a.round || a.sticky) { 364 float_raise(float_flag_inexact, fp_status); 365 switch (fp_status->float_rounding_mode) { 366 case float_round_to_zero: 367 /* Chop and we're done */ 368 break; 369 case float_round_up: 370 if (a.sign == 0) { 371 a.mant = int128_add(a.mant, int128_one()); 372 } 373 break; 374 case float_round_down: 375 if (a.sign != 0) { 376 a.mant = int128_add(a.mant, int128_one()); 377 } 378 break; 379 default: 380 if (a.round || a.sticky) { 381 /* round up if guard is 1, down if guard is zero */ 382 a.mant = int128_add(a.mant, int128_make64(a.guard)); 383 } else if (a.guard) { 384 /* exactly .5, round up if odd */ 385 a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); 386 } 387 break; 388 } 389 } 390 /* 391 * OK, now we might have carried all the way up. 392 * So we might need to shr once 393 * at least we know that the lsb should be zero if we rounded and 394 * got a carry out... 395 */ 396 if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) { 397 a = accum_norm_right(a, 1); 398 } 399 /* Overflow? */ 400 if (a.exp >= DF_INF_EXP) { 401 /* Yep, inf result */ 402 float_raise(float_flag_overflow, fp_status); 403 float_raise(float_flag_inexact, fp_status); 404 switch (fp_status->float_rounding_mode) { 405 case float_round_to_zero: 406 return maxfinite_float64(a.sign); 407 case float_round_up: 408 if (a.sign == 0) { 409 return infinite_float64(a.sign); 410 } else { 411 return maxfinite_float64(a.sign); 412 } 413 case float_round_down: 414 if (a.sign != 0) { 415 return infinite_float64(a.sign); 416 } else { 417 return maxfinite_float64(a.sign); 418 } 419 default: 420 return infinite_float64(a.sign); 421 } 422 } 423 /* Underflow? */ 424 ret = int128_getlo(a.mant); 425 if (ret & (1ULL << DF_MANTBITS)) { 426 /* Leading one means: No, we're normal. So, we should be done... */ 427 ret = deposit64(ret, 52, 11, a.exp); 428 } else { 429 assert(a.exp == 1); 430 ret = deposit64(ret, 52, 11, 0); 431 } 432 ret = deposit64(ret, 63, 1, a.sign); 433 return ret; 434 } 435 436 float64 internal_mpyhh(float64 a, float64 b, 437 unsigned long long int accumulated, 438 float_status *fp_status) 439 { 440 Accum x; 441 unsigned long long int prod; 442 unsigned int sticky; 443 uint8_t a_sign, b_sign; 444 445 sticky = accumulated & 1; 446 accumulated >>= 1; 447 accum_init(&x); 448 if (float64_is_zero(a) || 449 float64_is_any_nan(a) || 450 float64_is_infinity(a)) { 451 return float64_mul(a, b, fp_status); 452 } 453 if (float64_is_zero(b) || 454 float64_is_any_nan(b) || 455 float64_is_infinity(b)) { 456 return float64_mul(a, b, fp_status); 457 } 458 x.mant = int128_make64(accumulated); 459 x.sticky = sticky; 460 prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b)); 461 x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL)); 462 x.exp = float64_getexp(a) + float64_getexp(b) - DF_BIAS - 20; 463 if (!float64_is_normal(a) || !float64_is_normal(b)) { 464 /* crush to inexact zero */ 465 x.sticky = 1; 466 x.exp = -4096; 467 } 468 a_sign = float64_is_neg(a); 469 b_sign = float64_is_neg(b); 470 x.sign = a_sign ^ b_sign; 471 return accum_round_float64(x, fp_status); 472 } 473