1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/cputlb.h" 25 #include "accel/tcg/cpu-ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "fpu/softfloat-macros.h" 29 #include "helper-tcg.h" 30 #include "access.h" 31 32 /* float macros */ 33 #define FT0 (env->ft0) 34 #define ST0 (env->fpregs[env->fpstt].d) 35 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 36 #define ST1 ST(1) 37 38 #define FPU_RC_SHIFT 10 39 #define FPU_RC_MASK (3 << FPU_RC_SHIFT) 40 #define FPU_RC_NEAR 0x000 41 #define FPU_RC_DOWN 0x400 42 #define FPU_RC_UP 0x800 43 #define FPU_RC_CHOP 0xc00 44 45 #define MAXTAN 9223372036854775808.0 46 47 /* the following deal with x86 long double-precision numbers */ 48 #define MAXEXPD 0x7fff 49 #define EXPBIAS 16383 50 #define EXPD(fp) (fp.l.upper & 0x7fff) 51 #define SIGND(fp) ((fp.l.upper) & 0x8000) 52 #define MANTD(fp) (fp.l.lower) 53 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 54 55 #define FPUS_IE (1 << 0) 56 #define FPUS_DE (1 << 1) 57 #define FPUS_ZE (1 << 2) 58 #define FPUS_OE (1 << 3) 59 #define FPUS_UE (1 << 4) 60 #define FPUS_PE (1 << 5) 61 #define FPUS_SF (1 << 6) 62 #define FPUS_SE (1 << 7) 63 #define FPUS_B (1 << 15) 64 65 #define FPUC_EM 0x3f 66 67 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 68 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 69 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 70 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 71 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 72 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 73 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 74 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 75 76 static inline void fpush(CPUX86State *env) 77 { 78 env->fpstt = (env->fpstt - 1) & 7; 79 env->fptags[env->fpstt] = 0; /* validate stack entry */ 80 } 81 82 static inline void fpop(CPUX86State *env) 83 { 84 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 85 env->fpstt = (env->fpstt + 1) & 7; 86 } 87 88 static floatx80 do_fldt(X86Access *ac, target_ulong ptr) 89 { 90 CPU_LDoubleU temp; 91 92 temp.l.lower = access_ldq(ac, ptr); 93 temp.l.upper = access_ldw(ac, ptr + 8); 94 return temp.d; 95 } 96 97 static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f) 98 { 99 CPU_LDoubleU temp; 100 101 temp.d = f; 102 access_stq(ac, ptr, temp.l.lower); 103 access_stw(ac, ptr + 8, temp.l.upper); 104 } 105 106 /* x87 FPU helpers */ 107 108 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 109 { 110 union { 111 float64 f64; 112 double d; 113 } u; 114 115 u.f64 = floatx80_to_float64(a, &env->fp_status); 116 return u.d; 117 } 118 119 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 120 { 121 union { 122 float64 f64; 123 double d; 124 } u; 125 126 u.d = a; 127 return float64_to_floatx80(u.f64, &env->fp_status); 128 } 129 130 static void fpu_set_exception(CPUX86State *env, int mask) 131 { 132 env->fpus |= mask; 133 if (env->fpus & (~env->fpuc & FPUC_EM)) { 134 env->fpus |= FPUS_SE | FPUS_B; 135 } 136 } 137 138 void cpu_init_fp_statuses(CPUX86State *env) 139 { 140 /* 141 * Initialise the non-runtime-varying fields of the various 142 * float_status words to x86 behaviour. This must be called at 143 * CPU reset because the float_status words are in the 144 * "zeroed on reset" portion of the CPU state struct. 145 * Fields in float_status that vary under guest control are set 146 * via the codepath for setting that register, eg cpu_set_fpuc(). 147 */ 148 /* 149 * Use x87 NaN propagation rules: 150 * SNaN + QNaN => return the QNaN 151 * two SNaNs => return the one with the larger significand, silenced 152 * two QNaNs => return the one with the larger significand 153 * SNaN and a non-NaN => return the SNaN, silenced 154 * QNaN and a non-NaN => return the QNaN 155 * 156 * If we get down to comparing significands and they are the same, 157 * return the NaN with the positive sign bit (if any). 158 */ 159 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); 160 /* 161 * TODO: These are incorrect: the x86 Software Developer's Manual vol 1 162 * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the 163 * "larger significand" behaviour is only used for x87 FPU operations. 164 * For SSE the required behaviour is to always return the first NaN, 165 * which is float_2nan_prop_ab. 166 * 167 * mmx_status is used only for the AMD 3DNow! instructions, which 168 * are documented in the "3DNow! Technology Manual" as not supporting 169 * NaNs or infinities as inputs. The result of passing two NaNs is 170 * documented as "undefined", so we can do what we choose. 171 * (Strictly there is some behaviour we don't implement correctly 172 * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".) 173 */ 174 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status); 175 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status); 176 /* 177 * Only SSE has multiply-add instructions. In the SDM Section 14.5.2 178 * "Fused-Multiply-ADD (FMA) Numeric Behavior" the NaN handling is 179 * specified -- for 0 * inf + NaN the input NaN is selected, and if 180 * there are multiple input NaNs they are selected in the order a, b, c. 181 * We also do not raise Invalid for the 0 * inf + (Q)NaN case. 182 */ 183 set_float_infzeronan_rule(float_infzeronan_dnan_never | 184 float_infzeronan_suppress_invalid, 185 &env->sse_status); 186 set_float_3nan_prop_rule(float_3nan_prop_abc, &env->sse_status); 187 /* Default NaN: sign bit set, most significant frac bit set */ 188 set_float_default_nan_pattern(0b11000000, &env->fp_status); 189 set_float_default_nan_pattern(0b11000000, &env->mmx_status); 190 set_float_default_nan_pattern(0b11000000, &env->sse_status); 191 /* 192 * TODO: x86 does flush-to-zero detection after rounding (the SDM 193 * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush 194 * when we detect underflow, which x86 does after rounding). 195 */ 196 set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); 197 set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); 198 set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); 199 } 200 201 static inline uint8_t save_exception_flags(CPUX86State *env) 202 { 203 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 204 set_float_exception_flags(0, &env->fp_status); 205 return old_flags; 206 } 207 208 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 209 { 210 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 211 float_raise(old_flags, &env->fp_status); 212 fpu_set_exception(env, 213 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 214 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 215 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 216 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 217 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 218 (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0))); 219 } 220 221 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 222 { 223 uint8_t old_flags = save_exception_flags(env); 224 floatx80 ret = floatx80_div(a, b, &env->fp_status); 225 merge_exception_flags(env, old_flags); 226 return ret; 227 } 228 229 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 230 { 231 if (env->cr[0] & CR0_NE_MASK) { 232 raise_exception_ra(env, EXCP10_COPR, retaddr); 233 } 234 #if !defined(CONFIG_USER_ONLY) 235 else { 236 fpu_check_raise_ferr_irq(env); 237 } 238 #endif 239 } 240 241 void helper_flds_FT0(CPUX86State *env, uint32_t val) 242 { 243 uint8_t old_flags = save_exception_flags(env); 244 union { 245 float32 f; 246 uint32_t i; 247 } u; 248 249 u.i = val; 250 FT0 = float32_to_floatx80(u.f, &env->fp_status); 251 merge_exception_flags(env, old_flags); 252 } 253 254 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 255 { 256 uint8_t old_flags = save_exception_flags(env); 257 union { 258 float64 f; 259 uint64_t i; 260 } u; 261 262 u.i = val; 263 FT0 = float64_to_floatx80(u.f, &env->fp_status); 264 merge_exception_flags(env, old_flags); 265 } 266 267 void helper_fildl_FT0(CPUX86State *env, int32_t val) 268 { 269 FT0 = int32_to_floatx80(val, &env->fp_status); 270 } 271 272 void helper_flds_ST0(CPUX86State *env, uint32_t val) 273 { 274 uint8_t old_flags = save_exception_flags(env); 275 int new_fpstt; 276 union { 277 float32 f; 278 uint32_t i; 279 } u; 280 281 new_fpstt = (env->fpstt - 1) & 7; 282 u.i = val; 283 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 284 env->fpstt = new_fpstt; 285 env->fptags[new_fpstt] = 0; /* validate stack entry */ 286 merge_exception_flags(env, old_flags); 287 } 288 289 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 290 { 291 uint8_t old_flags = save_exception_flags(env); 292 int new_fpstt; 293 union { 294 float64 f; 295 uint64_t i; 296 } u; 297 298 new_fpstt = (env->fpstt - 1) & 7; 299 u.i = val; 300 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 301 env->fpstt = new_fpstt; 302 env->fptags[new_fpstt] = 0; /* validate stack entry */ 303 merge_exception_flags(env, old_flags); 304 } 305 306 static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 307 { 308 FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 309 set_floatx80_rounding_precision(floatx80_precision_x, st); 310 return old; 311 } 312 313 void helper_fildl_ST0(CPUX86State *env, int32_t val) 314 { 315 int new_fpstt; 316 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 317 318 new_fpstt = (env->fpstt - 1) & 7; 319 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 320 env->fpstt = new_fpstt; 321 env->fptags[new_fpstt] = 0; /* validate stack entry */ 322 323 set_floatx80_rounding_precision(old, &env->fp_status); 324 } 325 326 void helper_fildll_ST0(CPUX86State *env, int64_t val) 327 { 328 int new_fpstt; 329 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 330 331 new_fpstt = (env->fpstt - 1) & 7; 332 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 333 env->fpstt = new_fpstt; 334 env->fptags[new_fpstt] = 0; /* validate stack entry */ 335 336 set_floatx80_rounding_precision(old, &env->fp_status); 337 } 338 339 uint32_t helper_fsts_ST0(CPUX86State *env) 340 { 341 uint8_t old_flags = save_exception_flags(env); 342 union { 343 float32 f; 344 uint32_t i; 345 } u; 346 347 u.f = floatx80_to_float32(ST0, &env->fp_status); 348 merge_exception_flags(env, old_flags); 349 return u.i; 350 } 351 352 uint64_t helper_fstl_ST0(CPUX86State *env) 353 { 354 uint8_t old_flags = save_exception_flags(env); 355 union { 356 float64 f; 357 uint64_t i; 358 } u; 359 360 u.f = floatx80_to_float64(ST0, &env->fp_status); 361 merge_exception_flags(env, old_flags); 362 return u.i; 363 } 364 365 int32_t helper_fist_ST0(CPUX86State *env) 366 { 367 uint8_t old_flags = save_exception_flags(env); 368 int32_t val; 369 370 val = floatx80_to_int32(ST0, &env->fp_status); 371 if (val != (int16_t)val) { 372 set_float_exception_flags(float_flag_invalid, &env->fp_status); 373 val = -32768; 374 } 375 merge_exception_flags(env, old_flags); 376 return val; 377 } 378 379 int32_t helper_fistl_ST0(CPUX86State *env) 380 { 381 uint8_t old_flags = save_exception_flags(env); 382 int32_t val; 383 384 val = floatx80_to_int32(ST0, &env->fp_status); 385 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 386 val = 0x80000000; 387 } 388 merge_exception_flags(env, old_flags); 389 return val; 390 } 391 392 int64_t helper_fistll_ST0(CPUX86State *env) 393 { 394 uint8_t old_flags = save_exception_flags(env); 395 int64_t val; 396 397 val = floatx80_to_int64(ST0, &env->fp_status); 398 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 399 val = 0x8000000000000000ULL; 400 } 401 merge_exception_flags(env, old_flags); 402 return val; 403 } 404 405 int32_t helper_fistt_ST0(CPUX86State *env) 406 { 407 uint8_t old_flags = save_exception_flags(env); 408 int32_t val; 409 410 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 411 if (val != (int16_t)val) { 412 set_float_exception_flags(float_flag_invalid, &env->fp_status); 413 val = -32768; 414 } 415 merge_exception_flags(env, old_flags); 416 return val; 417 } 418 419 int32_t helper_fisttl_ST0(CPUX86State *env) 420 { 421 uint8_t old_flags = save_exception_flags(env); 422 int32_t val; 423 424 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 425 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 426 val = 0x80000000; 427 } 428 merge_exception_flags(env, old_flags); 429 return val; 430 } 431 432 int64_t helper_fisttll_ST0(CPUX86State *env) 433 { 434 uint8_t old_flags = save_exception_flags(env); 435 int64_t val; 436 437 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 438 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 439 val = 0x8000000000000000ULL; 440 } 441 merge_exception_flags(env, old_flags); 442 return val; 443 } 444 445 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 446 { 447 int new_fpstt; 448 X86Access ac; 449 450 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); 451 452 new_fpstt = (env->fpstt - 1) & 7; 453 env->fpregs[new_fpstt].d = do_fldt(&ac, ptr); 454 env->fpstt = new_fpstt; 455 env->fptags[new_fpstt] = 0; /* validate stack entry */ 456 } 457 458 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 459 { 460 X86Access ac; 461 462 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); 463 do_fstt(&ac, ptr, ST0); 464 } 465 466 void helper_fpush(CPUX86State *env) 467 { 468 fpush(env); 469 } 470 471 void helper_fpop(CPUX86State *env) 472 { 473 fpop(env); 474 } 475 476 void helper_fdecstp(CPUX86State *env) 477 { 478 env->fpstt = (env->fpstt - 1) & 7; 479 env->fpus &= ~0x4700; 480 } 481 482 void helper_fincstp(CPUX86State *env) 483 { 484 env->fpstt = (env->fpstt + 1) & 7; 485 env->fpus &= ~0x4700; 486 } 487 488 /* FPU move */ 489 490 void helper_ffree_STN(CPUX86State *env, int st_index) 491 { 492 env->fptags[(env->fpstt + st_index) & 7] = 1; 493 } 494 495 void helper_fmov_ST0_FT0(CPUX86State *env) 496 { 497 ST0 = FT0; 498 } 499 500 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 501 { 502 FT0 = ST(st_index); 503 } 504 505 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 506 { 507 ST0 = ST(st_index); 508 } 509 510 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 511 { 512 ST(st_index) = ST0; 513 } 514 515 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 516 { 517 floatx80 tmp; 518 519 tmp = ST(st_index); 520 ST(st_index) = ST0; 521 ST0 = tmp; 522 } 523 524 /* FPU operations */ 525 526 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 527 528 void helper_fcom_ST0_FT0(CPUX86State *env) 529 { 530 uint8_t old_flags = save_exception_flags(env); 531 FloatRelation ret; 532 533 ret = floatx80_compare(ST0, FT0, &env->fp_status); 534 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 535 merge_exception_flags(env, old_flags); 536 } 537 538 void helper_fucom_ST0_FT0(CPUX86State *env) 539 { 540 uint8_t old_flags = save_exception_flags(env); 541 FloatRelation ret; 542 543 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 544 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 545 merge_exception_flags(env, old_flags); 546 } 547 548 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 549 550 void helper_fcomi_ST0_FT0(CPUX86State *env) 551 { 552 uint8_t old_flags = save_exception_flags(env); 553 int eflags; 554 FloatRelation ret; 555 556 ret = floatx80_compare(ST0, FT0, &env->fp_status); 557 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 558 CC_SRC = eflags | fcomi_ccval[ret + 1]; 559 CC_OP = CC_OP_EFLAGS; 560 merge_exception_flags(env, old_flags); 561 } 562 563 void helper_fucomi_ST0_FT0(CPUX86State *env) 564 { 565 uint8_t old_flags = save_exception_flags(env); 566 int eflags; 567 FloatRelation ret; 568 569 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 570 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 571 CC_SRC = eflags | fcomi_ccval[ret + 1]; 572 CC_OP = CC_OP_EFLAGS; 573 merge_exception_flags(env, old_flags); 574 } 575 576 void helper_fadd_ST0_FT0(CPUX86State *env) 577 { 578 uint8_t old_flags = save_exception_flags(env); 579 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 580 merge_exception_flags(env, old_flags); 581 } 582 583 void helper_fmul_ST0_FT0(CPUX86State *env) 584 { 585 uint8_t old_flags = save_exception_flags(env); 586 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 587 merge_exception_flags(env, old_flags); 588 } 589 590 void helper_fsub_ST0_FT0(CPUX86State *env) 591 { 592 uint8_t old_flags = save_exception_flags(env); 593 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 594 merge_exception_flags(env, old_flags); 595 } 596 597 void helper_fsubr_ST0_FT0(CPUX86State *env) 598 { 599 uint8_t old_flags = save_exception_flags(env); 600 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 601 merge_exception_flags(env, old_flags); 602 } 603 604 void helper_fdiv_ST0_FT0(CPUX86State *env) 605 { 606 ST0 = helper_fdiv(env, ST0, FT0); 607 } 608 609 void helper_fdivr_ST0_FT0(CPUX86State *env) 610 { 611 ST0 = helper_fdiv(env, FT0, ST0); 612 } 613 614 /* fp operations between STN and ST0 */ 615 616 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 617 { 618 uint8_t old_flags = save_exception_flags(env); 619 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 620 merge_exception_flags(env, old_flags); 621 } 622 623 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 624 { 625 uint8_t old_flags = save_exception_flags(env); 626 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 627 merge_exception_flags(env, old_flags); 628 } 629 630 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 631 { 632 uint8_t old_flags = save_exception_flags(env); 633 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 634 merge_exception_flags(env, old_flags); 635 } 636 637 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 638 { 639 uint8_t old_flags = save_exception_flags(env); 640 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 641 merge_exception_flags(env, old_flags); 642 } 643 644 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 645 { 646 floatx80 *p; 647 648 p = &ST(st_index); 649 *p = helper_fdiv(env, *p, ST0); 650 } 651 652 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 653 { 654 floatx80 *p; 655 656 p = &ST(st_index); 657 *p = helper_fdiv(env, ST0, *p); 658 } 659 660 /* misc FPU operations */ 661 void helper_fchs_ST0(CPUX86State *env) 662 { 663 ST0 = floatx80_chs(ST0); 664 } 665 666 void helper_fabs_ST0(CPUX86State *env) 667 { 668 ST0 = floatx80_abs(ST0); 669 } 670 671 void helper_fld1_ST0(CPUX86State *env) 672 { 673 ST0 = floatx80_one; 674 } 675 676 void helper_fldl2t_ST0(CPUX86State *env) 677 { 678 switch (env->fpuc & FPU_RC_MASK) { 679 case FPU_RC_UP: 680 ST0 = floatx80_l2t_u; 681 break; 682 default: 683 ST0 = floatx80_l2t; 684 break; 685 } 686 } 687 688 void helper_fldl2e_ST0(CPUX86State *env) 689 { 690 switch (env->fpuc & FPU_RC_MASK) { 691 case FPU_RC_DOWN: 692 case FPU_RC_CHOP: 693 ST0 = floatx80_l2e_d; 694 break; 695 default: 696 ST0 = floatx80_l2e; 697 break; 698 } 699 } 700 701 void helper_fldpi_ST0(CPUX86State *env) 702 { 703 switch (env->fpuc & FPU_RC_MASK) { 704 case FPU_RC_DOWN: 705 case FPU_RC_CHOP: 706 ST0 = floatx80_pi_d; 707 break; 708 default: 709 ST0 = floatx80_pi; 710 break; 711 } 712 } 713 714 void helper_fldlg2_ST0(CPUX86State *env) 715 { 716 switch (env->fpuc & FPU_RC_MASK) { 717 case FPU_RC_DOWN: 718 case FPU_RC_CHOP: 719 ST0 = floatx80_lg2_d; 720 break; 721 default: 722 ST0 = floatx80_lg2; 723 break; 724 } 725 } 726 727 void helper_fldln2_ST0(CPUX86State *env) 728 { 729 switch (env->fpuc & FPU_RC_MASK) { 730 case FPU_RC_DOWN: 731 case FPU_RC_CHOP: 732 ST0 = floatx80_ln2_d; 733 break; 734 default: 735 ST0 = floatx80_ln2; 736 break; 737 } 738 } 739 740 void helper_fldz_ST0(CPUX86State *env) 741 { 742 ST0 = floatx80_zero; 743 } 744 745 void helper_fldz_FT0(CPUX86State *env) 746 { 747 FT0 = floatx80_zero; 748 } 749 750 uint32_t helper_fnstsw(CPUX86State *env) 751 { 752 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 753 } 754 755 uint32_t helper_fnstcw(CPUX86State *env) 756 { 757 return env->fpuc; 758 } 759 760 static void set_x86_rounding_mode(unsigned mode, float_status *status) 761 { 762 static FloatRoundMode x86_round_mode[4] = { 763 float_round_nearest_even, 764 float_round_down, 765 float_round_up, 766 float_round_to_zero 767 }; 768 assert(mode < ARRAY_SIZE(x86_round_mode)); 769 set_float_rounding_mode(x86_round_mode[mode], status); 770 } 771 772 void update_fp_status(CPUX86State *env) 773 { 774 int rnd_mode; 775 FloatX80RoundPrec rnd_prec; 776 777 /* set rounding mode */ 778 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; 779 set_x86_rounding_mode(rnd_mode, &env->fp_status); 780 781 switch ((env->fpuc >> 8) & 3) { 782 case 0: 783 rnd_prec = floatx80_precision_s; 784 break; 785 case 2: 786 rnd_prec = floatx80_precision_d; 787 break; 788 case 3: 789 default: 790 rnd_prec = floatx80_precision_x; 791 break; 792 } 793 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 794 } 795 796 void helper_fldcw(CPUX86State *env, uint32_t val) 797 { 798 cpu_set_fpuc(env, val); 799 } 800 801 void helper_fclex(CPUX86State *env) 802 { 803 env->fpus &= 0x7f00; 804 } 805 806 void helper_fwait(CPUX86State *env) 807 { 808 if (env->fpus & FPUS_SE) { 809 fpu_raise_exception(env, GETPC()); 810 } 811 } 812 813 static void do_fninit(CPUX86State *env) 814 { 815 env->fpus = 0; 816 env->fpstt = 0; 817 env->fpcs = 0; 818 env->fpds = 0; 819 env->fpip = 0; 820 env->fpdp = 0; 821 cpu_set_fpuc(env, 0x37f); 822 env->fptags[0] = 1; 823 env->fptags[1] = 1; 824 env->fptags[2] = 1; 825 env->fptags[3] = 1; 826 env->fptags[4] = 1; 827 env->fptags[5] = 1; 828 env->fptags[6] = 1; 829 env->fptags[7] = 1; 830 } 831 832 void helper_fninit(CPUX86State *env) 833 { 834 do_fninit(env); 835 } 836 837 /* BCD ops */ 838 839 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 840 { 841 X86Access ac; 842 floatx80 tmp; 843 uint64_t val; 844 unsigned int v; 845 int i; 846 847 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); 848 849 val = 0; 850 for (i = 8; i >= 0; i--) { 851 v = access_ldb(&ac, ptr + i); 852 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 853 } 854 tmp = int64_to_floatx80(val, &env->fp_status); 855 if (access_ldb(&ac, ptr + 9) & 0x80) { 856 tmp = floatx80_chs(tmp); 857 } 858 fpush(env); 859 ST0 = tmp; 860 } 861 862 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 863 { 864 uint8_t old_flags = save_exception_flags(env); 865 int v; 866 target_ulong mem_ref, mem_end; 867 int64_t val; 868 CPU_LDoubleU temp; 869 X86Access ac; 870 871 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); 872 temp.d = ST0; 873 874 val = floatx80_to_int64(ST0, &env->fp_status); 875 mem_ref = ptr; 876 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 877 set_float_exception_flags(float_flag_invalid, &env->fp_status); 878 while (mem_ref < ptr + 7) { 879 access_stb(&ac, mem_ref++, 0); 880 } 881 access_stb(&ac, mem_ref++, 0xc0); 882 access_stb(&ac, mem_ref++, 0xff); 883 access_stb(&ac, mem_ref++, 0xff); 884 merge_exception_flags(env, old_flags); 885 return; 886 } 887 mem_end = mem_ref + 9; 888 if (SIGND(temp)) { 889 access_stb(&ac, mem_end, 0x80); 890 val = -val; 891 } else { 892 access_stb(&ac, mem_end, 0x00); 893 } 894 while (mem_ref < mem_end) { 895 if (val == 0) { 896 break; 897 } 898 v = val % 100; 899 val = val / 100; 900 v = ((v / 10) << 4) | (v % 10); 901 access_stb(&ac, mem_ref++, v); 902 } 903 while (mem_ref < mem_end) { 904 access_stb(&ac, mem_ref++, 0); 905 } 906 merge_exception_flags(env, old_flags); 907 } 908 909 /* 128-bit significand of log(2). */ 910 #define ln2_sig_high 0xb17217f7d1cf79abULL 911 #define ln2_sig_low 0xc9e3b39803f2f6afULL 912 913 /* 914 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 915 * the interval [-1/64, 1/64]. 916 */ 917 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 918 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 919 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 920 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 921 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 922 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 923 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 924 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 925 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 926 927 struct f2xm1_data { 928 /* 929 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 930 * are very close to exact floatx80 values. 931 */ 932 floatx80 t; 933 /* The value of 2^t. */ 934 floatx80 exp2; 935 /* The value of 2^t - 1. */ 936 floatx80 exp2m1; 937 }; 938 939 static const struct f2xm1_data f2xm1_table[65] = { 940 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 941 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 942 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 943 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 944 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 945 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 946 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 947 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 948 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 949 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 950 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 951 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 952 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 953 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 954 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 955 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 956 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 957 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 958 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 959 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 960 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 961 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 962 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 963 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 964 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 965 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 966 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 967 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 968 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 969 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 970 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 971 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 972 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 973 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 974 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 975 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 976 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 977 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 978 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 979 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 980 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 981 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 982 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 983 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 984 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 985 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 986 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 987 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 988 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 989 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 990 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 991 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 992 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 993 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 994 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 995 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 996 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 997 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 998 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 999 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 1000 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 1001 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 1002 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 1003 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 1004 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 1005 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 1006 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 1007 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 1008 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 1009 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 1010 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 1011 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 1012 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 1013 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 1014 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 1015 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 1016 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 1017 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 1018 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 1019 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 1020 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 1021 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 1022 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 1023 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 1024 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 1025 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 1026 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 1027 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 1028 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 1029 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 1030 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 1031 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 1032 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 1033 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 1034 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 1035 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 1036 { floatx80_zero_init, 1037 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1038 floatx80_zero_init }, 1039 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 1040 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 1041 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 1042 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 1043 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 1044 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 1045 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 1046 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 1047 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 1048 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 1049 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 1050 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 1051 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 1052 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 1053 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 1054 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 1055 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 1056 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 1057 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 1058 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 1059 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 1060 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 1061 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 1062 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 1063 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 1064 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 1065 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 1066 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 1067 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 1068 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 1069 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 1070 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 1071 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 1072 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 1073 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 1074 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1075 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1076 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1077 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1078 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1079 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1080 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1081 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1082 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1083 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1084 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1085 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1086 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1087 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1088 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1089 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1090 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1091 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1092 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1093 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1094 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1095 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1096 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1097 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1098 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1099 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1100 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1101 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1102 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1103 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1104 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1105 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1106 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1107 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1108 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1109 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1110 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1111 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1112 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1113 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1114 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1115 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1116 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1117 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1118 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1119 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1120 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1121 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1122 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1123 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1124 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1125 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1126 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1127 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1128 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1129 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1130 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1131 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1132 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1133 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1134 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1135 }; 1136 1137 void helper_f2xm1(CPUX86State *env) 1138 { 1139 uint8_t old_flags = save_exception_flags(env); 1140 uint64_t sig = extractFloatx80Frac(ST0); 1141 int32_t exp = extractFloatx80Exp(ST0); 1142 bool sign = extractFloatx80Sign(ST0); 1143 1144 if (floatx80_invalid_encoding(ST0, &env->fp_status)) { 1145 float_raise(float_flag_invalid, &env->fp_status); 1146 ST0 = floatx80_default_nan(&env->fp_status); 1147 } else if (floatx80_is_any_nan(ST0)) { 1148 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1149 float_raise(float_flag_invalid, &env->fp_status); 1150 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1151 } 1152 } else if (exp > 0x3fff || 1153 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1154 /* Out of range for the instruction, treat as invalid. */ 1155 float_raise(float_flag_invalid, &env->fp_status); 1156 ST0 = floatx80_default_nan(&env->fp_status); 1157 } else if (exp == 0x3fff) { 1158 /* Argument 1 or -1, exact result 1 or -0.5. */ 1159 if (sign) { 1160 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1161 } 1162 } else if (exp < 0x3fb0) { 1163 if (!floatx80_is_zero(ST0)) { 1164 /* 1165 * Multiplying the argument by an extra-precision version 1166 * of log(2) is sufficiently precise. Zero arguments are 1167 * returned unchanged. 1168 */ 1169 uint64_t sig0, sig1, sig2; 1170 if (exp == 0) { 1171 normalizeFloatx80Subnormal(sig, &exp, &sig); 1172 } 1173 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1174 &sig2); 1175 /* This result is inexact. */ 1176 sig1 |= 1; 1177 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1178 sign, exp, sig0, sig1, 1179 &env->fp_status); 1180 } 1181 } else { 1182 floatx80 tmp, y, accum; 1183 bool asign, bsign; 1184 int32_t n, aexp, bexp; 1185 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1186 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1187 FloatX80RoundPrec save_prec = 1188 env->fp_status.floatx80_rounding_precision; 1189 env->fp_status.float_rounding_mode = float_round_nearest_even; 1190 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1191 1192 /* Find the nearest multiple of 1/32 to the argument. */ 1193 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1194 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1195 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1196 1197 if (floatx80_is_zero(y)) { 1198 /* 1199 * Use the value of 2^t - 1 from the table, to avoid 1200 * needing to special-case zero as a result of 1201 * multiplication below. 1202 */ 1203 ST0 = f2xm1_table[n].t; 1204 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1205 env->fp_status.float_rounding_mode = save_mode; 1206 } else { 1207 /* 1208 * Compute the lower parts of a polynomial expansion for 1209 * (2^y - 1) / y. 1210 */ 1211 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1212 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1213 accum = floatx80_mul(accum, y, &env->fp_status); 1214 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1215 accum = floatx80_mul(accum, y, &env->fp_status); 1216 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1217 accum = floatx80_mul(accum, y, &env->fp_status); 1218 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1219 accum = floatx80_mul(accum, y, &env->fp_status); 1220 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1221 accum = floatx80_mul(accum, y, &env->fp_status); 1222 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1223 accum = floatx80_mul(accum, y, &env->fp_status); 1224 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1225 1226 /* 1227 * The full polynomial expansion is f2xm1_coeff_0 + accum 1228 * (where accum has much lower magnitude, and so, in 1229 * particular, carry out of the addition is not possible). 1230 * (This expansion is only accurate to about 70 bits, not 1231 * 128 bits.) 1232 */ 1233 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1234 asign = extractFloatx80Sign(f2xm1_coeff_0); 1235 shift128RightJamming(extractFloatx80Frac(accum), 0, 1236 aexp - extractFloatx80Exp(accum), 1237 &asig0, &asig1); 1238 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1239 bsig1 = 0; 1240 if (asign == extractFloatx80Sign(accum)) { 1241 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1242 } else { 1243 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1244 } 1245 /* And thus compute an approximation to 2^y - 1. */ 1246 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1247 &asig0, &asig1, &asig2); 1248 aexp += extractFloatx80Exp(y) - 0x3ffe; 1249 asign ^= extractFloatx80Sign(y); 1250 if (n != 32) { 1251 /* 1252 * Multiply this by the precomputed value of 2^t and 1253 * add that of 2^t - 1. 1254 */ 1255 mul128By64To192(asig0, asig1, 1256 extractFloatx80Frac(f2xm1_table[n].exp2), 1257 &asig0, &asig1, &asig2); 1258 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1259 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1260 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1261 bsig1 = 0; 1262 if (bexp < aexp) { 1263 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1264 &bsig0, &bsig1); 1265 } else if (aexp < bexp) { 1266 shift128RightJamming(asig0, asig1, bexp - aexp, 1267 &asig0, &asig1); 1268 aexp = bexp; 1269 } 1270 /* The sign of 2^t - 1 is always that of the result. */ 1271 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1272 if (asign == bsign) { 1273 /* Avoid possible carry out of the addition. */ 1274 shift128RightJamming(asig0, asig1, 1, 1275 &asig0, &asig1); 1276 shift128RightJamming(bsig0, bsig1, 1, 1277 &bsig0, &bsig1); 1278 ++aexp; 1279 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1280 } else { 1281 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1282 asign = bsign; 1283 } 1284 } 1285 env->fp_status.float_rounding_mode = save_mode; 1286 /* This result is inexact. */ 1287 asig1 |= 1; 1288 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1289 asign, aexp, asig0, asig1, 1290 &env->fp_status); 1291 } 1292 1293 env->fp_status.floatx80_rounding_precision = save_prec; 1294 } 1295 merge_exception_flags(env, old_flags); 1296 } 1297 1298 void helper_fptan(CPUX86State *env) 1299 { 1300 double fptemp = floatx80_to_double(env, ST0); 1301 1302 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1303 env->fpus |= 0x400; 1304 } else { 1305 fptemp = tan(fptemp); 1306 ST0 = double_to_floatx80(env, fptemp); 1307 fpush(env); 1308 ST0 = floatx80_one; 1309 env->fpus &= ~0x400; /* C2 <-- 0 */ 1310 /* the above code is for |arg| < 2**52 only */ 1311 } 1312 } 1313 1314 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1315 #define pi_4_exp 0x3ffe 1316 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1317 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1318 #define pi_2_exp 0x3fff 1319 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1320 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1321 #define pi_34_exp 0x4000 1322 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1323 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1324 #define pi_exp 0x4000 1325 #define pi_sig_high 0xc90fdaa22168c234ULL 1326 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1327 1328 /* 1329 * Polynomial coefficients for an approximation to atan(x), with only 1330 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1331 * for some other approximations, no low part is needed for the first 1332 * coefficient here to achieve a sufficiently accurate result, because 1333 * the coefficient in this minimax approximation is very close to 1334 * exactly 1.) 1335 */ 1336 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1337 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1338 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1339 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1340 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1341 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1342 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1343 1344 struct fpatan_data { 1345 /* High and low parts of atan(x). */ 1346 floatx80 atan_high, atan_low; 1347 }; 1348 1349 static const struct fpatan_data fpatan_table[9] = { 1350 { floatx80_zero_init, 1351 floatx80_zero_init }, 1352 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1353 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1354 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1355 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1356 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1357 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1358 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1359 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1360 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1361 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1362 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1363 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1364 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1365 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1366 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1367 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1368 }; 1369 1370 void helper_fpatan(CPUX86State *env) 1371 { 1372 uint8_t old_flags = save_exception_flags(env); 1373 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1374 int32_t arg0_exp = extractFloatx80Exp(ST0); 1375 bool arg0_sign = extractFloatx80Sign(ST0); 1376 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1377 int32_t arg1_exp = extractFloatx80Exp(ST1); 1378 bool arg1_sign = extractFloatx80Sign(ST1); 1379 1380 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1381 float_raise(float_flag_invalid, &env->fp_status); 1382 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1383 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1384 float_raise(float_flag_invalid, &env->fp_status); 1385 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1386 } else if (floatx80_invalid_encoding(ST0, &env->fp_status) || 1387 floatx80_invalid_encoding(ST1, &env->fp_status)) { 1388 float_raise(float_flag_invalid, &env->fp_status); 1389 ST1 = floatx80_default_nan(&env->fp_status); 1390 } else if (floatx80_is_any_nan(ST0)) { 1391 ST1 = ST0; 1392 } else if (floatx80_is_any_nan(ST1)) { 1393 /* Pass this NaN through. */ 1394 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1395 /* Pass this zero through. */ 1396 } else if (((floatx80_is_infinity(ST0, &env->fp_status) && 1397 !floatx80_is_infinity(ST1, &env->fp_status)) || 1398 arg0_exp - arg1_exp >= 80) && 1399 !arg0_sign) { 1400 /* 1401 * Dividing ST1 by ST0 gives the correct result up to 1402 * rounding, and avoids spurious underflow exceptions that 1403 * might result from passing some small values through the 1404 * polynomial approximation, but if a finite nonzero result of 1405 * division is exact, the result of fpatan is still inexact 1406 * (and underflowing where appropriate). 1407 */ 1408 FloatX80RoundPrec save_prec = 1409 env->fp_status.floatx80_rounding_precision; 1410 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1411 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1412 env->fp_status.floatx80_rounding_precision = save_prec; 1413 if (!floatx80_is_zero(ST1) && 1414 !(get_float_exception_flags(&env->fp_status) & 1415 float_flag_inexact)) { 1416 /* 1417 * The mathematical result is very slightly closer to zero 1418 * than this exact result. Round a value with the 1419 * significand adjusted accordingly to get the correct 1420 * exceptions, and possibly an adjusted result depending 1421 * on the rounding mode. 1422 */ 1423 uint64_t sig = extractFloatx80Frac(ST1); 1424 int32_t exp = extractFloatx80Exp(ST1); 1425 bool sign = extractFloatx80Sign(ST1); 1426 if (exp == 0) { 1427 normalizeFloatx80Subnormal(sig, &exp, &sig); 1428 } 1429 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1430 sign, exp, sig - 1, 1431 -1, &env->fp_status); 1432 } 1433 } else { 1434 /* The result is inexact. */ 1435 bool rsign = arg1_sign; 1436 int32_t rexp; 1437 uint64_t rsig0, rsig1; 1438 if (floatx80_is_zero(ST1)) { 1439 /* 1440 * ST0 is negative. The result is pi with the sign of 1441 * ST1. 1442 */ 1443 rexp = pi_exp; 1444 rsig0 = pi_sig_high; 1445 rsig1 = pi_sig_low; 1446 } else if (floatx80_is_infinity(ST1, &env->fp_status)) { 1447 if (floatx80_is_infinity(ST0, &env->fp_status)) { 1448 if (arg0_sign) { 1449 rexp = pi_34_exp; 1450 rsig0 = pi_34_sig_high; 1451 rsig1 = pi_34_sig_low; 1452 } else { 1453 rexp = pi_4_exp; 1454 rsig0 = pi_4_sig_high; 1455 rsig1 = pi_4_sig_low; 1456 } 1457 } else { 1458 rexp = pi_2_exp; 1459 rsig0 = pi_2_sig_high; 1460 rsig1 = pi_2_sig_low; 1461 } 1462 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1463 rexp = pi_2_exp; 1464 rsig0 = pi_2_sig_high; 1465 rsig1 = pi_2_sig_low; 1466 } else if (floatx80_is_infinity(ST0, &env->fp_status) || 1467 arg0_exp - arg1_exp >= 80) { 1468 /* ST0 is negative. */ 1469 rexp = pi_exp; 1470 rsig0 = pi_sig_high; 1471 rsig1 = pi_sig_low; 1472 } else { 1473 /* 1474 * ST0 and ST1 are finite, nonzero and with exponents not 1475 * too far apart. 1476 */ 1477 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1478 int32_t azexp, axexp; 1479 bool adj_sub, ysign, zsign; 1480 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1481 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1482 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1483 uint64_t azsig0, azsig1; 1484 uint64_t azsig2, azsig3, axsig0, axsig1; 1485 floatx80 x8; 1486 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1487 FloatX80RoundPrec save_prec = 1488 env->fp_status.floatx80_rounding_precision; 1489 env->fp_status.float_rounding_mode = float_round_nearest_even; 1490 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1491 1492 if (arg0_exp == 0) { 1493 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1494 } 1495 if (arg1_exp == 0) { 1496 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1497 } 1498 if (arg0_exp > arg1_exp || 1499 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1500 /* Work with abs(ST1) / abs(ST0). */ 1501 num_exp = arg1_exp; 1502 num_sig = arg1_sig; 1503 den_exp = arg0_exp; 1504 den_sig = arg0_sig; 1505 if (arg0_sign) { 1506 /* The result is subtracted from pi. */ 1507 adj_exp = pi_exp; 1508 adj_sig0 = pi_sig_high; 1509 adj_sig1 = pi_sig_low; 1510 adj_sub = true; 1511 } else { 1512 /* The result is used as-is. */ 1513 adj_exp = 0; 1514 adj_sig0 = 0; 1515 adj_sig1 = 0; 1516 adj_sub = false; 1517 } 1518 } else { 1519 /* Work with abs(ST0) / abs(ST1). */ 1520 num_exp = arg0_exp; 1521 num_sig = arg0_sig; 1522 den_exp = arg1_exp; 1523 den_sig = arg1_sig; 1524 /* The result is added to or subtracted from pi/2. */ 1525 adj_exp = pi_2_exp; 1526 adj_sig0 = pi_2_sig_high; 1527 adj_sig1 = pi_2_sig_low; 1528 adj_sub = !arg0_sign; 1529 } 1530 1531 /* 1532 * Compute x = num/den, where 0 < x <= 1 and x is not too 1533 * small. 1534 */ 1535 xexp = num_exp - den_exp + 0x3ffe; 1536 remsig0 = num_sig; 1537 remsig1 = 0; 1538 if (den_sig <= remsig0) { 1539 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1540 ++xexp; 1541 } 1542 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1543 mul64To128(den_sig, xsig0, &msig0, &msig1); 1544 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1545 while ((int64_t) remsig0 < 0) { 1546 --xsig0; 1547 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1548 } 1549 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1550 /* 1551 * No need to correct any estimation error in xsig1; even 1552 * with such error, it is accurate enough. 1553 */ 1554 1555 /* 1556 * Split x as x = t + y, where t = n/8 is the nearest 1557 * multiple of 1/8 to x. 1558 */ 1559 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1560 false, xexp + 3, xsig0, 1561 xsig1, &env->fp_status); 1562 n = floatx80_to_int32(x8, &env->fp_status); 1563 if (n == 0) { 1564 ysign = false; 1565 yexp = xexp; 1566 ysig0 = xsig0; 1567 ysig1 = xsig1; 1568 texp = 0; 1569 tsig = 0; 1570 } else { 1571 int shift = clz32(n) + 32; 1572 texp = 0x403b - shift; 1573 tsig = n; 1574 tsig <<= shift; 1575 if (texp == xexp) { 1576 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1577 if ((int64_t) ysig0 >= 0) { 1578 ysign = false; 1579 if (ysig0 == 0) { 1580 if (ysig1 == 0) { 1581 yexp = 0; 1582 } else { 1583 shift = clz64(ysig1) + 64; 1584 yexp = xexp - shift; 1585 shift128Left(ysig0, ysig1, shift, 1586 &ysig0, &ysig1); 1587 } 1588 } else { 1589 shift = clz64(ysig0); 1590 yexp = xexp - shift; 1591 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1592 } 1593 } else { 1594 ysign = true; 1595 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1596 if (ysig0 == 0) { 1597 shift = clz64(ysig1) + 64; 1598 } else { 1599 shift = clz64(ysig0); 1600 } 1601 yexp = xexp - shift; 1602 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1603 } 1604 } else { 1605 /* 1606 * t's exponent must be greater than x's because t 1607 * is positive and the nearest multiple of 1/8 to 1608 * x, and if x has a greater exponent, the power 1609 * of 2 with that exponent is also a multiple of 1610 * 1/8. 1611 */ 1612 uint64_t usig0, usig1; 1613 shift128RightJamming(xsig0, xsig1, texp - xexp, 1614 &usig0, &usig1); 1615 ysign = true; 1616 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1617 if (ysig0 == 0) { 1618 shift = clz64(ysig1) + 64; 1619 } else { 1620 shift = clz64(ysig0); 1621 } 1622 yexp = texp - shift; 1623 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1624 } 1625 } 1626 1627 /* 1628 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1629 * arctan(z). 1630 */ 1631 zsign = ysign; 1632 if (texp == 0 || yexp == 0) { 1633 zexp = yexp; 1634 zsig0 = ysig0; 1635 zsig1 = ysig1; 1636 } else { 1637 /* 1638 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1639 */ 1640 int32_t dexp = texp + xexp - 0x3ffe; 1641 uint64_t dsig0, dsig1, dsig2; 1642 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1643 /* 1644 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1645 * bit). Add 1 to produce the denominator 1+tx. 1646 */ 1647 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1648 &dsig0, &dsig1); 1649 dsig0 |= 0x8000000000000000ULL; 1650 zexp = yexp - 1; 1651 remsig0 = ysig0; 1652 remsig1 = ysig1; 1653 remsig2 = 0; 1654 if (dsig0 <= remsig0) { 1655 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1656 ++zexp; 1657 } 1658 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1659 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1660 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1661 &remsig0, &remsig1, &remsig2); 1662 while ((int64_t) remsig0 < 0) { 1663 --zsig0; 1664 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1665 &remsig0, &remsig1, &remsig2); 1666 } 1667 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1668 /* No need to correct any estimation error in zsig1. */ 1669 } 1670 1671 if (zexp == 0) { 1672 azexp = 0; 1673 azsig0 = 0; 1674 azsig1 = 0; 1675 } else { 1676 floatx80 z2, accum; 1677 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1678 /* Compute z^2. */ 1679 mul128To256(zsig0, zsig1, zsig0, zsig1, 1680 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1681 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1682 zexp + zexp - 0x3ffe, 1683 z2sig0, z2sig1, 1684 &env->fp_status); 1685 1686 /* Compute the lower parts of the polynomial expansion. */ 1687 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1688 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1689 accum = floatx80_mul(accum, z2, &env->fp_status); 1690 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1691 accum = floatx80_mul(accum, z2, &env->fp_status); 1692 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1693 accum = floatx80_mul(accum, z2, &env->fp_status); 1694 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1695 accum = floatx80_mul(accum, z2, &env->fp_status); 1696 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1697 accum = floatx80_mul(accum, z2, &env->fp_status); 1698 1699 /* 1700 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1701 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1702 */ 1703 aexp = extractFloatx80Exp(fpatan_coeff_0); 1704 shift128RightJamming(extractFloatx80Frac(accum), 0, 1705 aexp - extractFloatx80Exp(accum), 1706 &asig0, &asig1); 1707 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1708 &asig0, &asig1); 1709 /* Multiply by z to compute arctan(z). */ 1710 azexp = aexp + zexp - 0x3ffe; 1711 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1712 &azsig2, &azsig3); 1713 } 1714 1715 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1716 if (texp == 0) { 1717 /* z is positive. */ 1718 axexp = azexp; 1719 axsig0 = azsig0; 1720 axsig1 = azsig1; 1721 } else { 1722 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1723 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1724 uint64_t low_sig0 = 1725 extractFloatx80Frac(fpatan_table[n].atan_low); 1726 uint64_t low_sig1 = 0; 1727 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1728 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1729 axsig1 = 0; 1730 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1731 &low_sig0, &low_sig1); 1732 if (low_sign) { 1733 sub128(axsig0, axsig1, low_sig0, low_sig1, 1734 &axsig0, &axsig1); 1735 } else { 1736 add128(axsig0, axsig1, low_sig0, low_sig1, 1737 &axsig0, &axsig1); 1738 } 1739 if (azexp >= axexp) { 1740 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1741 &axsig0, &axsig1); 1742 axexp = azexp + 1; 1743 shift128RightJamming(azsig0, azsig1, 1, 1744 &azsig0, &azsig1); 1745 } else { 1746 shift128RightJamming(axsig0, axsig1, 1, 1747 &axsig0, &axsig1); 1748 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1749 &azsig0, &azsig1); 1750 ++axexp; 1751 } 1752 if (zsign) { 1753 sub128(axsig0, axsig1, azsig0, azsig1, 1754 &axsig0, &axsig1); 1755 } else { 1756 add128(axsig0, axsig1, azsig0, azsig1, 1757 &axsig0, &axsig1); 1758 } 1759 } 1760 1761 if (adj_exp == 0) { 1762 rexp = axexp; 1763 rsig0 = axsig0; 1764 rsig1 = axsig1; 1765 } else { 1766 /* 1767 * Add or subtract arctan(x) (exponent axexp, 1768 * significand axsig0 and axsig1, positive, not 1769 * necessarily normalized) to the number given by 1770 * adj_exp, adj_sig0 and adj_sig1, according to 1771 * adj_sub. 1772 */ 1773 if (adj_exp >= axexp) { 1774 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1775 &axsig0, &axsig1); 1776 rexp = adj_exp + 1; 1777 shift128RightJamming(adj_sig0, adj_sig1, 1, 1778 &adj_sig0, &adj_sig1); 1779 } else { 1780 shift128RightJamming(axsig0, axsig1, 1, 1781 &axsig0, &axsig1); 1782 shift128RightJamming(adj_sig0, adj_sig1, 1783 axexp - adj_exp + 1, 1784 &adj_sig0, &adj_sig1); 1785 rexp = axexp + 1; 1786 } 1787 if (adj_sub) { 1788 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1789 &rsig0, &rsig1); 1790 } else { 1791 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1792 &rsig0, &rsig1); 1793 } 1794 } 1795 1796 env->fp_status.float_rounding_mode = save_mode; 1797 env->fp_status.floatx80_rounding_precision = save_prec; 1798 } 1799 /* This result is inexact. */ 1800 rsig1 |= 1; 1801 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1802 rsig0, rsig1, &env->fp_status); 1803 } 1804 1805 fpop(env); 1806 merge_exception_flags(env, old_flags); 1807 } 1808 1809 void helper_fxtract(CPUX86State *env) 1810 { 1811 uint8_t old_flags = save_exception_flags(env); 1812 CPU_LDoubleU temp; 1813 1814 temp.d = ST0; 1815 1816 if (floatx80_is_zero(ST0)) { 1817 /* Easy way to generate -inf and raising division by 0 exception */ 1818 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1819 &env->fp_status); 1820 fpush(env); 1821 ST0 = temp.d; 1822 } else if (floatx80_invalid_encoding(ST0, &env->fp_status)) { 1823 float_raise(float_flag_invalid, &env->fp_status); 1824 ST0 = floatx80_default_nan(&env->fp_status); 1825 fpush(env); 1826 ST0 = ST1; 1827 } else if (floatx80_is_any_nan(ST0)) { 1828 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1829 float_raise(float_flag_invalid, &env->fp_status); 1830 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1831 } 1832 fpush(env); 1833 ST0 = ST1; 1834 } else if (floatx80_is_infinity(ST0, &env->fp_status)) { 1835 fpush(env); 1836 ST0 = ST1; 1837 ST1 = floatx80_default_inf(0, &env->fp_status); 1838 } else { 1839 int expdif; 1840 1841 if (EXPD(temp) == 0) { 1842 int shift = clz64(temp.l.lower); 1843 temp.l.lower <<= shift; 1844 expdif = 1 - EXPBIAS - shift; 1845 float_raise(float_flag_input_denormal_flushed, &env->fp_status); 1846 } else { 1847 expdif = EXPD(temp) - EXPBIAS; 1848 } 1849 /* DP exponent bias */ 1850 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1851 fpush(env); 1852 BIASEXPONENT(temp); 1853 ST0 = temp.d; 1854 } 1855 merge_exception_flags(env, old_flags); 1856 } 1857 1858 static void helper_fprem_common(CPUX86State *env, bool mod) 1859 { 1860 uint8_t old_flags = save_exception_flags(env); 1861 uint64_t quotient; 1862 CPU_LDoubleU temp0, temp1; 1863 int exp0, exp1, expdiff; 1864 1865 temp0.d = ST0; 1866 temp1.d = ST1; 1867 exp0 = EXPD(temp0); 1868 exp1 = EXPD(temp1); 1869 1870 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1871 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1872 exp0 == 0x7fff || exp1 == 0x7fff || 1873 floatx80_invalid_encoding(ST0, &env->fp_status) || 1874 floatx80_invalid_encoding(ST1, &env->fp_status)) { 1875 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1876 } else { 1877 if (exp0 == 0) { 1878 exp0 = 1 - clz64(temp0.l.lower); 1879 } 1880 if (exp1 == 0) { 1881 exp1 = 1 - clz64(temp1.l.lower); 1882 } 1883 expdiff = exp0 - exp1; 1884 if (expdiff < 64) { 1885 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1886 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1887 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1888 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1889 } else { 1890 /* 1891 * Partial remainder. This choice of how many bits to 1892 * process at once is specified in AMD instruction set 1893 * manuals, and empirically is followed by Intel 1894 * processors as well; it ensures that the final remainder 1895 * operation in a loop does produce the correct low three 1896 * bits of the quotient. AMD manuals specify that the 1897 * flags other than C2 are cleared, and empirically Intel 1898 * processors clear them as well. 1899 */ 1900 int n = 32 + (expdiff % 32); 1901 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1902 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1903 env->fpus |= 0x400; /* C2 <-- 1 */ 1904 } 1905 } 1906 merge_exception_flags(env, old_flags); 1907 } 1908 1909 void helper_fprem1(CPUX86State *env) 1910 { 1911 helper_fprem_common(env, false); 1912 } 1913 1914 void helper_fprem(CPUX86State *env) 1915 { 1916 helper_fprem_common(env, true); 1917 } 1918 1919 /* 128-bit significand of log2(e). */ 1920 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1921 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1922 1923 /* 1924 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1925 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1926 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1927 * interval [sqrt(2)/2, sqrt(2)]. 1928 */ 1929 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1930 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1931 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1932 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1933 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1934 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1935 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1936 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1937 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1938 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1939 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1940 1941 /* 1942 * Compute an approximation of log2(1+arg), where 1+arg is in the 1943 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1944 * function is called, rounding precision is set to 80 and the 1945 * round-to-nearest mode is in effect. arg must not be exactly zero, 1946 * and must not be so close to zero that underflow might occur. 1947 */ 1948 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1949 uint64_t *sig0, uint64_t *sig1) 1950 { 1951 uint64_t arg0_sig = extractFloatx80Frac(arg); 1952 int32_t arg0_exp = extractFloatx80Exp(arg); 1953 bool arg0_sign = extractFloatx80Sign(arg); 1954 bool asign; 1955 int32_t dexp, texp, aexp; 1956 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1957 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1958 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1959 floatx80 t2, accum; 1960 1961 /* 1962 * Compute an approximation of arg/(2+arg), with extra precision, 1963 * as the argument to a polynomial approximation. The extra 1964 * precision is only needed for the first term of the 1965 * approximation, with subsequent terms being significantly 1966 * smaller; the approximation only uses odd exponents, and the 1967 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1968 */ 1969 if (arg0_sign) { 1970 dexp = 0x3fff; 1971 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1972 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1973 } else { 1974 dexp = 0x4000; 1975 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1976 dsig0 |= 0x8000000000000000ULL; 1977 } 1978 texp = arg0_exp - dexp + 0x3ffe; 1979 rsig0 = arg0_sig; 1980 rsig1 = 0; 1981 rsig2 = 0; 1982 if (dsig0 <= rsig0) { 1983 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1984 ++texp; 1985 } 1986 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1987 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1988 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1989 &rsig0, &rsig1, &rsig2); 1990 while ((int64_t) rsig0 < 0) { 1991 --tsig0; 1992 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1993 &rsig0, &rsig1, &rsig2); 1994 } 1995 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1996 /* 1997 * No need to correct any estimation error in tsig1; even with 1998 * such error, it is accurate enough. Now compute the square of 1999 * that approximation. 2000 */ 2001 mul128To256(tsig0, tsig1, tsig0, tsig1, 2002 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 2003 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 2004 texp + texp - 0x3ffe, 2005 t2sig0, t2sig1, &env->fp_status); 2006 2007 /* Compute the lower parts of the polynomial expansion. */ 2008 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 2009 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 2010 accum = floatx80_mul(accum, t2, &env->fp_status); 2011 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 2012 accum = floatx80_mul(accum, t2, &env->fp_status); 2013 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 2014 accum = floatx80_mul(accum, t2, &env->fp_status); 2015 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 2016 accum = floatx80_mul(accum, t2, &env->fp_status); 2017 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 2018 accum = floatx80_mul(accum, t2, &env->fp_status); 2019 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 2020 accum = floatx80_mul(accum, t2, &env->fp_status); 2021 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 2022 accum = floatx80_mul(accum, t2, &env->fp_status); 2023 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 2024 accum = floatx80_mul(accum, t2, &env->fp_status); 2025 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 2026 2027 /* 2028 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 2029 * accum has much lower magnitude, and so, in particular, carry 2030 * out of the addition is not possible), multiplied by t. (This 2031 * expansion is only accurate to about 70 bits, not 128 bits.) 2032 */ 2033 aexp = extractFloatx80Exp(fyl2x_coeff_0); 2034 asign = extractFloatx80Sign(fyl2x_coeff_0); 2035 shift128RightJamming(extractFloatx80Frac(accum), 0, 2036 aexp - extractFloatx80Exp(accum), 2037 &asig0, &asig1); 2038 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 2039 bsig1 = 0; 2040 if (asign == extractFloatx80Sign(accum)) { 2041 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 2042 } else { 2043 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 2044 } 2045 /* Multiply by t to compute the required result. */ 2046 mul128To256(asig0, asig1, tsig0, tsig1, 2047 &asig0, &asig1, &asig2, &asig3); 2048 aexp += texp - 0x3ffe; 2049 *exp = aexp; 2050 *sig0 = asig0; 2051 *sig1 = asig1; 2052 } 2053 2054 void helper_fyl2xp1(CPUX86State *env) 2055 { 2056 uint8_t old_flags = save_exception_flags(env); 2057 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2058 int32_t arg0_exp = extractFloatx80Exp(ST0); 2059 bool arg0_sign = extractFloatx80Sign(ST0); 2060 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2061 int32_t arg1_exp = extractFloatx80Exp(ST1); 2062 bool arg1_sign = extractFloatx80Sign(ST1); 2063 2064 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2065 float_raise(float_flag_invalid, &env->fp_status); 2066 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2067 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2068 float_raise(float_flag_invalid, &env->fp_status); 2069 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2070 } else if (floatx80_invalid_encoding(ST0, &env->fp_status) || 2071 floatx80_invalid_encoding(ST1, &env->fp_status)) { 2072 float_raise(float_flag_invalid, &env->fp_status); 2073 ST1 = floatx80_default_nan(&env->fp_status); 2074 } else if (floatx80_is_any_nan(ST0)) { 2075 ST1 = ST0; 2076 } else if (floatx80_is_any_nan(ST1)) { 2077 /* Pass this NaN through. */ 2078 } else if (arg0_exp > 0x3ffd || 2079 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2080 0x95f619980c4336f7ULL : 2081 0xd413cccfe7799211ULL))) { 2082 /* 2083 * Out of range for the instruction (ST0 must have absolute 2084 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2085 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2086 * to sqrt(2) - 1, which we allow here), treat as invalid. 2087 */ 2088 float_raise(float_flag_invalid, &env->fp_status); 2089 ST1 = floatx80_default_nan(&env->fp_status); 2090 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2091 arg1_exp == 0x7fff) { 2092 /* 2093 * One argument is zero, or multiplying by infinity; correct 2094 * result is exact and can be obtained by multiplying the 2095 * arguments. 2096 */ 2097 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2098 } else if (arg0_exp < 0x3fb0) { 2099 /* 2100 * Multiplying both arguments and an extra-precision version 2101 * of log2(e) is sufficiently precise. 2102 */ 2103 uint64_t sig0, sig1, sig2; 2104 int32_t exp; 2105 if (arg0_exp == 0) { 2106 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2107 } 2108 if (arg1_exp == 0) { 2109 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2110 } 2111 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2112 &sig0, &sig1, &sig2); 2113 exp = arg0_exp + 1; 2114 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2115 exp += arg1_exp - 0x3ffe; 2116 /* This result is inexact. */ 2117 sig1 |= 1; 2118 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2119 arg0_sign ^ arg1_sign, exp, 2120 sig0, sig1, &env->fp_status); 2121 } else { 2122 int32_t aexp; 2123 uint64_t asig0, asig1, asig2; 2124 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2125 FloatX80RoundPrec save_prec = 2126 env->fp_status.floatx80_rounding_precision; 2127 env->fp_status.float_rounding_mode = float_round_nearest_even; 2128 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2129 2130 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2131 /* 2132 * Multiply by the second argument to compute the required 2133 * result. 2134 */ 2135 if (arg1_exp == 0) { 2136 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2137 } 2138 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2139 aexp += arg1_exp - 0x3ffe; 2140 /* This result is inexact. */ 2141 asig1 |= 1; 2142 env->fp_status.float_rounding_mode = save_mode; 2143 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2144 arg0_sign ^ arg1_sign, aexp, 2145 asig0, asig1, &env->fp_status); 2146 env->fp_status.floatx80_rounding_precision = save_prec; 2147 } 2148 fpop(env); 2149 merge_exception_flags(env, old_flags); 2150 } 2151 2152 void helper_fyl2x(CPUX86State *env) 2153 { 2154 uint8_t old_flags = save_exception_flags(env); 2155 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2156 int32_t arg0_exp = extractFloatx80Exp(ST0); 2157 bool arg0_sign = extractFloatx80Sign(ST0); 2158 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2159 int32_t arg1_exp = extractFloatx80Exp(ST1); 2160 bool arg1_sign = extractFloatx80Sign(ST1); 2161 2162 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2163 float_raise(float_flag_invalid, &env->fp_status); 2164 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2165 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2166 float_raise(float_flag_invalid, &env->fp_status); 2167 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2168 } else if (floatx80_invalid_encoding(ST0, &env->fp_status) || 2169 floatx80_invalid_encoding(ST1, &env->fp_status)) { 2170 float_raise(float_flag_invalid, &env->fp_status); 2171 ST1 = floatx80_default_nan(&env->fp_status); 2172 } else if (floatx80_is_any_nan(ST0)) { 2173 ST1 = ST0; 2174 } else if (floatx80_is_any_nan(ST1)) { 2175 /* Pass this NaN through. */ 2176 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2177 float_raise(float_flag_invalid, &env->fp_status); 2178 ST1 = floatx80_default_nan(&env->fp_status); 2179 } else if (floatx80_is_infinity(ST1, &env->fp_status)) { 2180 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2181 &env->fp_status); 2182 switch (cmp) { 2183 case float_relation_less: 2184 ST1 = floatx80_chs(ST1); 2185 break; 2186 case float_relation_greater: 2187 /* Result is infinity of the same sign as ST1. */ 2188 break; 2189 default: 2190 float_raise(float_flag_invalid, &env->fp_status); 2191 ST1 = floatx80_default_nan(&env->fp_status); 2192 break; 2193 } 2194 } else if (floatx80_is_infinity(ST0, &env->fp_status)) { 2195 if (floatx80_is_zero(ST1)) { 2196 float_raise(float_flag_invalid, &env->fp_status); 2197 ST1 = floatx80_default_nan(&env->fp_status); 2198 } else if (arg1_sign) { 2199 ST1 = floatx80_chs(ST0); 2200 } else { 2201 ST1 = ST0; 2202 } 2203 } else if (floatx80_is_zero(ST0)) { 2204 if (floatx80_is_zero(ST1)) { 2205 float_raise(float_flag_invalid, &env->fp_status); 2206 ST1 = floatx80_default_nan(&env->fp_status); 2207 } else { 2208 /* Result is infinity with opposite sign to ST1. */ 2209 float_raise(float_flag_divbyzero, &env->fp_status); 2210 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2211 0x8000000000000000ULL); 2212 } 2213 } else if (floatx80_is_zero(ST1)) { 2214 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2215 ST1 = floatx80_chs(ST1); 2216 } 2217 /* Otherwise, ST1 is already the correct result. */ 2218 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2219 if (arg1_sign) { 2220 ST1 = floatx80_chs(floatx80_zero); 2221 } else { 2222 ST1 = floatx80_zero; 2223 } 2224 } else { 2225 int32_t int_exp; 2226 floatx80 arg0_m1; 2227 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2228 FloatX80RoundPrec save_prec = 2229 env->fp_status.floatx80_rounding_precision; 2230 env->fp_status.float_rounding_mode = float_round_nearest_even; 2231 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2232 2233 if (arg0_exp == 0) { 2234 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2235 } 2236 if (arg1_exp == 0) { 2237 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2238 } 2239 int_exp = arg0_exp - 0x3fff; 2240 if (arg0_sig > 0xb504f333f9de6484ULL) { 2241 ++int_exp; 2242 } 2243 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2244 &env->fp_status), 2245 floatx80_one, &env->fp_status); 2246 if (floatx80_is_zero(arg0_m1)) { 2247 /* Exact power of 2; multiply by ST1. */ 2248 env->fp_status.float_rounding_mode = save_mode; 2249 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2250 ST1, &env->fp_status); 2251 } else { 2252 bool asign = extractFloatx80Sign(arg0_m1); 2253 int32_t aexp; 2254 uint64_t asig0, asig1, asig2; 2255 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2256 if (int_exp != 0) { 2257 bool isign = (int_exp < 0); 2258 int32_t iexp; 2259 uint64_t isig; 2260 int shift; 2261 int_exp = isign ? -int_exp : int_exp; 2262 shift = clz32(int_exp) + 32; 2263 isig = int_exp; 2264 isig <<= shift; 2265 iexp = 0x403e - shift; 2266 shift128RightJamming(asig0, asig1, iexp - aexp, 2267 &asig0, &asig1); 2268 if (asign == isign) { 2269 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2270 } else { 2271 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2272 } 2273 aexp = iexp; 2274 asign = isign; 2275 } 2276 /* 2277 * Multiply by the second argument to compute the required 2278 * result. 2279 */ 2280 if (arg1_exp == 0) { 2281 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2282 } 2283 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2284 aexp += arg1_exp - 0x3ffe; 2285 /* This result is inexact. */ 2286 asig1 |= 1; 2287 env->fp_status.float_rounding_mode = save_mode; 2288 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2289 asign ^ arg1_sign, aexp, 2290 asig0, asig1, &env->fp_status); 2291 } 2292 2293 env->fp_status.floatx80_rounding_precision = save_prec; 2294 } 2295 fpop(env); 2296 merge_exception_flags(env, old_flags); 2297 } 2298 2299 void helper_fsqrt(CPUX86State *env) 2300 { 2301 uint8_t old_flags = save_exception_flags(env); 2302 if (floatx80_is_neg(ST0)) { 2303 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2304 env->fpus |= 0x400; 2305 } 2306 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2307 merge_exception_flags(env, old_flags); 2308 } 2309 2310 void helper_fsincos(CPUX86State *env) 2311 { 2312 double fptemp = floatx80_to_double(env, ST0); 2313 2314 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2315 env->fpus |= 0x400; 2316 } else { 2317 ST0 = double_to_floatx80(env, sin(fptemp)); 2318 fpush(env); 2319 ST0 = double_to_floatx80(env, cos(fptemp)); 2320 env->fpus &= ~0x400; /* C2 <-- 0 */ 2321 /* the above code is for |arg| < 2**63 only */ 2322 } 2323 } 2324 2325 void helper_frndint(CPUX86State *env) 2326 { 2327 uint8_t old_flags = save_exception_flags(env); 2328 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2329 merge_exception_flags(env, old_flags); 2330 } 2331 2332 void helper_fscale(CPUX86State *env) 2333 { 2334 uint8_t old_flags = save_exception_flags(env); 2335 if (floatx80_invalid_encoding(ST1, &env->fp_status) || 2336 floatx80_invalid_encoding(ST0, &env->fp_status)) { 2337 float_raise(float_flag_invalid, &env->fp_status); 2338 ST0 = floatx80_default_nan(&env->fp_status); 2339 } else if (floatx80_is_any_nan(ST1)) { 2340 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2341 float_raise(float_flag_invalid, &env->fp_status); 2342 } 2343 ST0 = ST1; 2344 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2345 float_raise(float_flag_invalid, &env->fp_status); 2346 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2347 } 2348 } else if (floatx80_is_infinity(ST1, &env->fp_status) && 2349 !floatx80_invalid_encoding(ST0, &env->fp_status) && 2350 !floatx80_is_any_nan(ST0)) { 2351 if (floatx80_is_neg(ST1)) { 2352 if (floatx80_is_infinity(ST0, &env->fp_status)) { 2353 float_raise(float_flag_invalid, &env->fp_status); 2354 ST0 = floatx80_default_nan(&env->fp_status); 2355 } else { 2356 ST0 = (floatx80_is_neg(ST0) ? 2357 floatx80_chs(floatx80_zero) : 2358 floatx80_zero); 2359 } 2360 } else { 2361 if (floatx80_is_zero(ST0)) { 2362 float_raise(float_flag_invalid, &env->fp_status); 2363 ST0 = floatx80_default_nan(&env->fp_status); 2364 } else { 2365 ST0 = floatx80_default_inf(floatx80_is_neg(ST0), 2366 &env->fp_status); 2367 } 2368 } 2369 } else { 2370 int n; 2371 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2372 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2373 set_float_exception_flags(0, &env->fp_status); 2374 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2375 set_float_exception_flags(save_flags, &env->fp_status); 2376 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2377 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2378 env->fp_status.floatx80_rounding_precision = save; 2379 } 2380 merge_exception_flags(env, old_flags); 2381 } 2382 2383 void helper_fsin(CPUX86State *env) 2384 { 2385 double fptemp = floatx80_to_double(env, ST0); 2386 2387 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2388 env->fpus |= 0x400; 2389 } else { 2390 ST0 = double_to_floatx80(env, sin(fptemp)); 2391 env->fpus &= ~0x400; /* C2 <-- 0 */ 2392 /* the above code is for |arg| < 2**53 only */ 2393 } 2394 } 2395 2396 void helper_fcos(CPUX86State *env) 2397 { 2398 double fptemp = floatx80_to_double(env, ST0); 2399 2400 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2401 env->fpus |= 0x400; 2402 } else { 2403 ST0 = double_to_floatx80(env, cos(fptemp)); 2404 env->fpus &= ~0x400; /* C2 <-- 0 */ 2405 /* the above code is for |arg| < 2**63 only */ 2406 } 2407 } 2408 2409 void helper_fxam_ST0(CPUX86State *env) 2410 { 2411 CPU_LDoubleU temp; 2412 int expdif; 2413 2414 temp.d = ST0; 2415 2416 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2417 if (SIGND(temp)) { 2418 env->fpus |= 0x200; /* C1 <-- 1 */ 2419 } 2420 2421 if (env->fptags[env->fpstt]) { 2422 env->fpus |= 0x4100; /* Empty */ 2423 return; 2424 } 2425 2426 expdif = EXPD(temp); 2427 if (expdif == MAXEXPD) { 2428 if (MANTD(temp) == 0x8000000000000000ULL) { 2429 env->fpus |= 0x500; /* Infinity */ 2430 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2431 env->fpus |= 0x100; /* NaN */ 2432 } 2433 } else if (expdif == 0) { 2434 if (MANTD(temp) == 0) { 2435 env->fpus |= 0x4000; /* Zero */ 2436 } else { 2437 env->fpus |= 0x4400; /* Denormal */ 2438 } 2439 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2440 env->fpus |= 0x400; 2441 } 2442 } 2443 2444 static void do_fstenv(X86Access *ac, target_ulong ptr, int data32) 2445 { 2446 CPUX86State *env = ac->env; 2447 int fpus, fptag, exp, i; 2448 uint64_t mant; 2449 CPU_LDoubleU tmp; 2450 2451 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2452 fptag = 0; 2453 for (i = 7; i >= 0; i--) { 2454 fptag <<= 2; 2455 if (env->fptags[i]) { 2456 fptag |= 3; 2457 } else { 2458 tmp.d = env->fpregs[i].d; 2459 exp = EXPD(tmp); 2460 mant = MANTD(tmp); 2461 if (exp == 0 && mant == 0) { 2462 /* zero */ 2463 fptag |= 1; 2464 } else if (exp == 0 || exp == MAXEXPD 2465 || (mant & (1LL << 63)) == 0) { 2466 /* NaNs, infinity, denormal */ 2467 fptag |= 2; 2468 } 2469 } 2470 } 2471 if (data32) { 2472 /* 32 bit */ 2473 access_stl(ac, ptr, env->fpuc); 2474 access_stl(ac, ptr + 4, fpus); 2475 access_stl(ac, ptr + 8, fptag); 2476 access_stl(ac, ptr + 12, env->fpip); /* fpip */ 2477 access_stl(ac, ptr + 16, env->fpcs); /* fpcs */ 2478 access_stl(ac, ptr + 20, env->fpdp); /* fpoo */ 2479 access_stl(ac, ptr + 24, env->fpds); /* fpos */ 2480 } else { 2481 /* 16 bit */ 2482 access_stw(ac, ptr, env->fpuc); 2483 access_stw(ac, ptr + 2, fpus); 2484 access_stw(ac, ptr + 4, fptag); 2485 access_stw(ac, ptr + 6, env->fpip); 2486 access_stw(ac, ptr + 8, env->fpcs); 2487 access_stw(ac, ptr + 10, env->fpdp); 2488 access_stw(ac, ptr + 12, env->fpds); 2489 } 2490 } 2491 2492 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2493 { 2494 X86Access ac; 2495 2496 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); 2497 do_fstenv(&ac, ptr, data32); 2498 } 2499 2500 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2501 { 2502 env->fpstt = (fpus >> 11) & 7; 2503 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2504 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2505 #if !defined(CONFIG_USER_ONLY) 2506 if (!(env->fpus & FPUS_SE)) { 2507 /* 2508 * Here the processor deasserts FERR#; in response, the chipset deasserts 2509 * IGNNE#. 2510 */ 2511 cpu_clear_ignne(); 2512 } 2513 #endif 2514 } 2515 2516 static void do_fldenv(X86Access *ac, target_ulong ptr, int data32) 2517 { 2518 int i, fpus, fptag; 2519 CPUX86State *env = ac->env; 2520 2521 cpu_set_fpuc(env, access_ldw(ac, ptr)); 2522 fpus = access_ldw(ac, ptr + (2 << data32)); 2523 fptag = access_ldw(ac, ptr + (4 << data32)); 2524 2525 cpu_set_fpus(env, fpus); 2526 for (i = 0; i < 8; i++) { 2527 env->fptags[i] = ((fptag & 3) == 3); 2528 fptag >>= 2; 2529 } 2530 } 2531 2532 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2533 { 2534 X86Access ac; 2535 2536 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); 2537 do_fldenv(&ac, ptr, data32); 2538 } 2539 2540 static void do_fsave(X86Access *ac, target_ulong ptr, int data32) 2541 { 2542 CPUX86State *env = ac->env; 2543 2544 do_fstenv(ac, ptr, data32); 2545 ptr += 14 << data32; 2546 2547 for (int i = 0; i < 8; i++) { 2548 floatx80 tmp = ST(i); 2549 do_fstt(ac, ptr, tmp); 2550 ptr += 10; 2551 } 2552 2553 do_fninit(env); 2554 } 2555 2556 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2557 { 2558 int size = (14 << data32) + 80; 2559 X86Access ac; 2560 2561 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC()); 2562 do_fsave(&ac, ptr, data32); 2563 } 2564 2565 static void do_frstor(X86Access *ac, target_ulong ptr, int data32) 2566 { 2567 CPUX86State *env = ac->env; 2568 2569 do_fldenv(ac, ptr, data32); 2570 ptr += 14 << data32; 2571 2572 for (int i = 0; i < 8; i++) { 2573 floatx80 tmp = do_fldt(ac, ptr); 2574 ST(i) = tmp; 2575 ptr += 10; 2576 } 2577 } 2578 2579 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2580 { 2581 int size = (14 << data32) + 80; 2582 X86Access ac; 2583 2584 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC()); 2585 do_frstor(&ac, ptr, data32); 2586 } 2587 2588 #define XO(X) offsetof(X86XSaveArea, X) 2589 2590 static void do_xsave_fpu(X86Access *ac, target_ulong ptr) 2591 { 2592 CPUX86State *env = ac->env; 2593 int fpus, fptag, i; 2594 target_ulong addr; 2595 2596 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2597 fptag = 0; 2598 for (i = 0; i < 8; i++) { 2599 fptag |= (env->fptags[i] << i); 2600 } 2601 2602 access_stw(ac, ptr + XO(legacy.fcw), env->fpuc); 2603 access_stw(ac, ptr + XO(legacy.fsw), fpus); 2604 access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff); 2605 2606 /* In 32-bit mode this is eip, sel, dp, sel. 2607 In 64-bit mode this is rip, rdp. 2608 But in either case we don't write actual data, just zeros. */ 2609 access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */ 2610 access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */ 2611 2612 addr = ptr + XO(legacy.fpregs); 2613 2614 for (i = 0; i < 8; i++) { 2615 floatx80 tmp = ST(i); 2616 do_fstt(ac, addr, tmp); 2617 addr += 16; 2618 } 2619 } 2620 2621 static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr) 2622 { 2623 CPUX86State *env = ac->env; 2624 2625 update_mxcsr_from_sse_status(env); 2626 access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr); 2627 access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff); 2628 } 2629 2630 static void do_xsave_sse(X86Access *ac, target_ulong ptr) 2631 { 2632 CPUX86State *env = ac->env; 2633 int i, nb_xmm_regs; 2634 target_ulong addr; 2635 2636 if (env->hflags & HF_CS64_MASK) { 2637 nb_xmm_regs = 16; 2638 } else { 2639 nb_xmm_regs = 8; 2640 } 2641 2642 addr = ptr + XO(legacy.xmm_regs); 2643 for (i = 0; i < nb_xmm_regs; i++) { 2644 access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0)); 2645 access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1)); 2646 addr += 16; 2647 } 2648 } 2649 2650 static void do_xsave_ymmh(X86Access *ac, target_ulong ptr) 2651 { 2652 CPUX86State *env = ac->env; 2653 int i, nb_xmm_regs; 2654 2655 if (env->hflags & HF_CS64_MASK) { 2656 nb_xmm_regs = 16; 2657 } else { 2658 nb_xmm_regs = 8; 2659 } 2660 2661 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2662 access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2)); 2663 access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3)); 2664 } 2665 } 2666 2667 static void do_xsave_bndregs(X86Access *ac, target_ulong ptr) 2668 { 2669 CPUX86State *env = ac->env; 2670 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2671 int i; 2672 2673 for (i = 0; i < 4; i++, addr += 16) { 2674 access_stq(ac, addr, env->bnd_regs[i].lb); 2675 access_stq(ac, addr + 8, env->bnd_regs[i].ub); 2676 } 2677 } 2678 2679 static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr) 2680 { 2681 CPUX86State *env = ac->env; 2682 2683 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2684 env->bndcs_regs.cfgu); 2685 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2686 env->bndcs_regs.sts); 2687 } 2688 2689 static void do_xsave_pkru(X86Access *ac, target_ulong ptr) 2690 { 2691 access_stq(ac, ptr, ac->env->pkru); 2692 } 2693 2694 static void do_fxsave(X86Access *ac, target_ulong ptr) 2695 { 2696 CPUX86State *env = ac->env; 2697 2698 do_xsave_fpu(ac, ptr); 2699 if (env->cr[4] & CR4_OSFXSR_MASK) { 2700 do_xsave_mxcsr(ac, ptr); 2701 /* Fast FXSAVE leaves out the XMM registers */ 2702 if (!(env->efer & MSR_EFER_FFXSR) 2703 || (env->hflags & HF_CPL_MASK) 2704 || !(env->hflags & HF_LMA_MASK)) { 2705 do_xsave_sse(ac, ptr); 2706 } 2707 } 2708 } 2709 2710 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2711 { 2712 uintptr_t ra = GETPC(); 2713 X86Access ac; 2714 2715 /* The operand must be 16 byte aligned */ 2716 if (ptr & 0xf) { 2717 raise_exception_ra(env, EXCP0D_GPF, ra); 2718 } 2719 2720 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), 2721 MMU_DATA_STORE, ra); 2722 do_fxsave(&ac, ptr); 2723 } 2724 2725 static uint64_t get_xinuse(CPUX86State *env) 2726 { 2727 uint64_t inuse = -1; 2728 2729 /* For the most part, we don't track XINUSE. We could calculate it 2730 here for all components, but it's probably less work to simply 2731 indicate in use. That said, the state of BNDREGS is important 2732 enough to track in HFLAGS, so we might as well use that here. */ 2733 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2734 inuse &= ~XSTATE_BNDREGS_MASK; 2735 } 2736 return inuse; 2737 } 2738 2739 static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm, 2740 uint64_t inuse, uint64_t opt) 2741 { 2742 uint64_t old_bv, new_bv; 2743 2744 if (opt & XSTATE_FP_MASK) { 2745 do_xsave_fpu(ac, ptr); 2746 } 2747 if (rfbm & XSTATE_SSE_MASK) { 2748 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2749 do_xsave_mxcsr(ac, ptr); 2750 } 2751 if (opt & XSTATE_SSE_MASK) { 2752 do_xsave_sse(ac, ptr); 2753 } 2754 if (opt & XSTATE_YMM_MASK) { 2755 do_xsave_ymmh(ac, ptr + XO(avx_state)); 2756 } 2757 if (opt & XSTATE_BNDREGS_MASK) { 2758 do_xsave_bndregs(ac, ptr + XO(bndreg_state)); 2759 } 2760 if (opt & XSTATE_BNDCSR_MASK) { 2761 do_xsave_bndcsr(ac, ptr + XO(bndcsr_state)); 2762 } 2763 if (opt & XSTATE_PKRU_MASK) { 2764 do_xsave_pkru(ac, ptr + XO(pkru_state)); 2765 } 2766 2767 /* Update the XSTATE_BV field. */ 2768 old_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); 2769 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2770 access_stq(ac, ptr + XO(header.xstate_bv), new_bv); 2771 } 2772 2773 static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2774 { 2775 /* The OS must have enabled XSAVE. */ 2776 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2777 raise_exception_ra(env, EXCP06_ILLOP, ra); 2778 } 2779 2780 /* The operand must be 64 byte aligned. */ 2781 if (ptr & 63) { 2782 raise_exception_ra(env, EXCP0D_GPF, ra); 2783 } 2784 } 2785 2786 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2787 uint64_t inuse, uint64_t opt, uintptr_t ra) 2788 { 2789 X86Access ac; 2790 unsigned size; 2791 2792 do_xsave_chk(env, ptr, ra); 2793 2794 /* Never save anything not enabled by XCR0. */ 2795 rfbm &= env->xcr0; 2796 opt &= rfbm; 2797 size = xsave_area_size(opt, false); 2798 2799 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra); 2800 do_xsave_access(&ac, ptr, rfbm, inuse, opt); 2801 } 2802 2803 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2804 { 2805 do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC()); 2806 } 2807 2808 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2809 { 2810 uint64_t inuse = get_xinuse(env); 2811 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2812 } 2813 2814 static void do_xrstor_fpu(X86Access *ac, target_ulong ptr) 2815 { 2816 CPUX86State *env = ac->env; 2817 int i, fpuc, fpus, fptag; 2818 target_ulong addr; 2819 2820 fpuc = access_ldw(ac, ptr + XO(legacy.fcw)); 2821 fpus = access_ldw(ac, ptr + XO(legacy.fsw)); 2822 fptag = access_ldw(ac, ptr + XO(legacy.ftw)); 2823 cpu_set_fpuc(env, fpuc); 2824 cpu_set_fpus(env, fpus); 2825 2826 fptag ^= 0xff; 2827 for (i = 0; i < 8; i++) { 2828 env->fptags[i] = ((fptag >> i) & 1); 2829 } 2830 2831 addr = ptr + XO(legacy.fpregs); 2832 2833 for (i = 0; i < 8; i++) { 2834 floatx80 tmp = do_fldt(ac, addr); 2835 ST(i) = tmp; 2836 addr += 16; 2837 } 2838 } 2839 2840 static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr) 2841 { 2842 CPUX86State *env = ac->env; 2843 cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr))); 2844 } 2845 2846 static void do_xrstor_sse(X86Access *ac, target_ulong ptr) 2847 { 2848 CPUX86State *env = ac->env; 2849 int i, nb_xmm_regs; 2850 target_ulong addr; 2851 2852 if (env->hflags & HF_CS64_MASK) { 2853 nb_xmm_regs = 16; 2854 } else { 2855 nb_xmm_regs = 8; 2856 } 2857 2858 addr = ptr + XO(legacy.xmm_regs); 2859 for (i = 0; i < nb_xmm_regs; i++) { 2860 env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr); 2861 env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8); 2862 addr += 16; 2863 } 2864 } 2865 2866 static void do_clear_sse(CPUX86State *env) 2867 { 2868 int i, nb_xmm_regs; 2869 2870 if (env->hflags & HF_CS64_MASK) { 2871 nb_xmm_regs = 16; 2872 } else { 2873 nb_xmm_regs = 8; 2874 } 2875 2876 for (i = 0; i < nb_xmm_regs; i++) { 2877 env->xmm_regs[i].ZMM_Q(0) = 0; 2878 env->xmm_regs[i].ZMM_Q(1) = 0; 2879 } 2880 } 2881 2882 static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr) 2883 { 2884 CPUX86State *env = ac->env; 2885 int i, nb_xmm_regs; 2886 2887 if (env->hflags & HF_CS64_MASK) { 2888 nb_xmm_regs = 16; 2889 } else { 2890 nb_xmm_regs = 8; 2891 } 2892 2893 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2894 env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr); 2895 env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8); 2896 } 2897 } 2898 2899 static void do_clear_ymmh(CPUX86State *env) 2900 { 2901 int i, nb_xmm_regs; 2902 2903 if (env->hflags & HF_CS64_MASK) { 2904 nb_xmm_regs = 16; 2905 } else { 2906 nb_xmm_regs = 8; 2907 } 2908 2909 for (i = 0; i < nb_xmm_regs; i++) { 2910 env->xmm_regs[i].ZMM_Q(2) = 0; 2911 env->xmm_regs[i].ZMM_Q(3) = 0; 2912 } 2913 } 2914 2915 static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr) 2916 { 2917 CPUX86State *env = ac->env; 2918 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2919 int i; 2920 2921 for (i = 0; i < 4; i++, addr += 16) { 2922 env->bnd_regs[i].lb = access_ldq(ac, addr); 2923 env->bnd_regs[i].ub = access_ldq(ac, addr + 8); 2924 } 2925 } 2926 2927 static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr) 2928 { 2929 CPUX86State *env = ac->env; 2930 2931 /* FIXME: Extend highest implemented bit of linear address. */ 2932 env->bndcs_regs.cfgu 2933 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu)); 2934 env->bndcs_regs.sts 2935 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts)); 2936 } 2937 2938 static void do_xrstor_pkru(X86Access *ac, target_ulong ptr) 2939 { 2940 ac->env->pkru = access_ldq(ac, ptr); 2941 } 2942 2943 static void do_fxrstor(X86Access *ac, target_ulong ptr) 2944 { 2945 CPUX86State *env = ac->env; 2946 2947 do_xrstor_fpu(ac, ptr); 2948 if (env->cr[4] & CR4_OSFXSR_MASK) { 2949 do_xrstor_mxcsr(ac, ptr); 2950 /* Fast FXRSTOR leaves out the XMM registers */ 2951 if (!(env->efer & MSR_EFER_FFXSR) 2952 || (env->hflags & HF_CPL_MASK) 2953 || !(env->hflags & HF_LMA_MASK)) { 2954 do_xrstor_sse(ac, ptr); 2955 } 2956 } 2957 } 2958 2959 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2960 { 2961 uintptr_t ra = GETPC(); 2962 X86Access ac; 2963 2964 /* The operand must be 16 byte aligned */ 2965 if (ptr & 0xf) { 2966 raise_exception_ra(env, EXCP0D_GPF, ra); 2967 } 2968 2969 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), 2970 MMU_DATA_LOAD, ra); 2971 do_fxrstor(&ac, ptr); 2972 } 2973 2974 static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv, 2975 target_ulong ptr) 2976 { 2977 uint64_t xstate_bv, xcomp_bv, reserve0; 2978 2979 xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); 2980 xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv)); 2981 reserve0 = access_ldq(ac, ptr + XO(header.reserve0)); 2982 *pxsbv = xstate_bv; 2983 2984 /* 2985 * XCOMP_BV bit 63 indicates compact form, which we do not support, 2986 * and thus must raise #GP. That leaves us in standard form. 2987 * In standard form, bytes 23:8 must be zero -- which is both 2988 * XCOMP_BV and the following 64-bit field. 2989 */ 2990 if (xcomp_bv || reserve0) { 2991 return false; 2992 } 2993 2994 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2995 return (xstate_bv & ~ac->env->xcr0) == 0; 2996 } 2997 2998 static void do_xrstor(X86Access *ac, target_ulong ptr, 2999 uint64_t rfbm, uint64_t xstate_bv) 3000 { 3001 CPUX86State *env = ac->env; 3002 3003 if (rfbm & XSTATE_FP_MASK) { 3004 if (xstate_bv & XSTATE_FP_MASK) { 3005 do_xrstor_fpu(ac, ptr); 3006 } else { 3007 do_fninit(env); 3008 memset(env->fpregs, 0, sizeof(env->fpregs)); 3009 } 3010 } 3011 if (rfbm & XSTATE_SSE_MASK) { 3012 /* Note that the standard form of XRSTOR loads MXCSR from memory 3013 whether or not the XSTATE_BV bit is set. */ 3014 do_xrstor_mxcsr(ac, ptr); 3015 if (xstate_bv & XSTATE_SSE_MASK) { 3016 do_xrstor_sse(ac, ptr); 3017 } else { 3018 do_clear_sse(env); 3019 } 3020 } 3021 if (rfbm & XSTATE_YMM_MASK) { 3022 if (xstate_bv & XSTATE_YMM_MASK) { 3023 do_xrstor_ymmh(ac, ptr + XO(avx_state)); 3024 } else { 3025 do_clear_ymmh(env); 3026 } 3027 } 3028 if (rfbm & XSTATE_BNDREGS_MASK) { 3029 if (xstate_bv & XSTATE_BNDREGS_MASK) { 3030 do_xrstor_bndregs(ac, ptr + XO(bndreg_state)); 3031 env->hflags |= HF_MPX_IU_MASK; 3032 } else { 3033 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 3034 env->hflags &= ~HF_MPX_IU_MASK; 3035 } 3036 } 3037 if (rfbm & XSTATE_BNDCSR_MASK) { 3038 if (xstate_bv & XSTATE_BNDCSR_MASK) { 3039 do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state)); 3040 } else { 3041 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 3042 } 3043 cpu_sync_bndcs_hflags(env); 3044 } 3045 if (rfbm & XSTATE_PKRU_MASK) { 3046 uint64_t old_pkru = env->pkru; 3047 if (xstate_bv & XSTATE_PKRU_MASK) { 3048 do_xrstor_pkru(ac, ptr + XO(pkru_state)); 3049 } else { 3050 env->pkru = 0; 3051 } 3052 if (env->pkru != old_pkru) { 3053 CPUState *cs = env_cpu(env); 3054 tlb_flush(cs); 3055 } 3056 } 3057 } 3058 3059 #undef XO 3060 3061 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 3062 { 3063 uintptr_t ra = GETPC(); 3064 X86Access ac; 3065 uint64_t xstate_bv; 3066 unsigned size, size_ext; 3067 3068 do_xsave_chk(env, ptr, ra); 3069 3070 /* Begin with just the minimum size to validate the header. */ 3071 size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); 3072 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); 3073 if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) { 3074 raise_exception_ra(env, EXCP0D_GPF, ra); 3075 } 3076 3077 rfbm &= env->xcr0; 3078 size_ext = xsave_area_size(rfbm & xstate_bv, false); 3079 if (size < size_ext) { 3080 /* TODO: See if existing page probe has covered extra size. */ 3081 access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra); 3082 } 3083 3084 do_xrstor(&ac, ptr, rfbm, xstate_bv); 3085 } 3086 3087 #if defined(CONFIG_USER_ONLY) 3088 void cpu_x86_fsave(CPUX86State *env, void *host, size_t len) 3089 { 3090 X86Access ac = { 3091 .haddr1 = host, 3092 .size = 4 * 7 + 8 * 10, 3093 .env = env, 3094 }; 3095 3096 assert(ac.size <= len); 3097 do_fsave(&ac, 0, true); 3098 } 3099 3100 void cpu_x86_frstor(CPUX86State *env, void *host, size_t len) 3101 { 3102 X86Access ac = { 3103 .haddr1 = host, 3104 .size = 4 * 7 + 8 * 10, 3105 .env = env, 3106 }; 3107 3108 assert(ac.size <= len); 3109 do_frstor(&ac, 0, true); 3110 } 3111 3112 void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len) 3113 { 3114 X86Access ac = { 3115 .haddr1 = host, 3116 .size = sizeof(X86LegacyXSaveArea), 3117 .env = env, 3118 }; 3119 3120 assert(ac.size <= len); 3121 do_fxsave(&ac, 0); 3122 } 3123 3124 void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len) 3125 { 3126 X86Access ac = { 3127 .haddr1 = host, 3128 .size = sizeof(X86LegacyXSaveArea), 3129 .env = env, 3130 }; 3131 3132 assert(ac.size <= len); 3133 do_fxrstor(&ac, 0); 3134 } 3135 3136 void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm) 3137 { 3138 X86Access ac = { 3139 .haddr1 = host, 3140 .env = env, 3141 }; 3142 3143 /* 3144 * Since this is only called from user-level signal handling, 3145 * we should have done the job correctly there. 3146 */ 3147 assert((rfbm & ~env->xcr0) == 0); 3148 ac.size = xsave_area_size(rfbm, false); 3149 assert(ac.size <= len); 3150 do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm); 3151 } 3152 3153 bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm) 3154 { 3155 X86Access ac = { 3156 .haddr1 = host, 3157 .env = env, 3158 }; 3159 uint64_t xstate_bv; 3160 3161 /* 3162 * Since this is only called from user-level signal handling, 3163 * we should have done the job correctly there. 3164 */ 3165 assert((rfbm & ~env->xcr0) == 0); 3166 ac.size = xsave_area_size(rfbm, false); 3167 assert(ac.size <= len); 3168 3169 if (!valid_xrstor_header(&ac, &xstate_bv, 0)) { 3170 return false; 3171 } 3172 do_xrstor(&ac, 0, rfbm, xstate_bv); 3173 return true; 3174 } 3175 #endif 3176 3177 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 3178 { 3179 /* The OS must have enabled XSAVE. */ 3180 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3181 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3182 } 3183 3184 switch (ecx) { 3185 case 0: 3186 return env->xcr0; 3187 case 1: 3188 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 3189 return env->xcr0 & get_xinuse(env); 3190 } 3191 break; 3192 } 3193 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3194 } 3195 3196 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 3197 { 3198 uint32_t dummy, ena_lo, ena_hi; 3199 uint64_t ena; 3200 3201 /* The OS must have enabled XSAVE. */ 3202 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3203 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3204 } 3205 3206 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 3207 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 3208 goto do_gpf; 3209 } 3210 3211 /* SSE can be disabled, but only if AVX is disabled too. */ 3212 if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) { 3213 goto do_gpf; 3214 } 3215 3216 /* Disallow enabling unimplemented features. */ 3217 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 3218 ena = ((uint64_t)ena_hi << 32) | ena_lo; 3219 if (mask & ~ena) { 3220 goto do_gpf; 3221 } 3222 3223 /* Disallow enabling only half of MPX. */ 3224 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 3225 & XSTATE_BNDCSR_MASK) { 3226 goto do_gpf; 3227 } 3228 3229 env->xcr0 = mask; 3230 cpu_sync_bndcs_hflags(env); 3231 cpu_sync_avx_hflag(env); 3232 return; 3233 3234 do_gpf: 3235 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3236 } 3237 3238 /* MMX/SSE */ 3239 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 3240 3241 #define SSE_DAZ 0x0040 3242 #define SSE_RC_SHIFT 13 3243 #define SSE_RC_MASK (3 << SSE_RC_SHIFT) 3244 #define SSE_FZ 0x8000 3245 3246 void update_mxcsr_status(CPUX86State *env) 3247 { 3248 uint32_t mxcsr = env->mxcsr; 3249 int rnd_type; 3250 3251 /* set rounding mode */ 3252 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; 3253 set_x86_rounding_mode(rnd_type, &env->sse_status); 3254 3255 /* Set exception flags. */ 3256 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 3257 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 3258 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3259 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3260 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3261 &env->sse_status); 3262 3263 /* set denormals are zero */ 3264 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3265 3266 /* set flush to zero */ 3267 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3268 } 3269 3270 void update_mxcsr_from_sse_status(CPUX86State *env) 3271 { 3272 uint8_t flags = get_float_exception_flags(&env->sse_status); 3273 /* 3274 * The MXCSR denormal flag has opposite semantics to 3275 * float_flag_input_denormal_flushed (the softfloat code sets that flag 3276 * only when flushing input denormals to zero, but SSE sets it 3277 * only when not flushing them to zero), so is not converted 3278 * here. 3279 */ 3280 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3281 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3282 (flags & float_flag_overflow ? FPUS_OE : 0) | 3283 (flags & float_flag_underflow ? FPUS_UE : 0) | 3284 (flags & float_flag_inexact ? FPUS_PE : 0) | 3285 (flags & float_flag_output_denormal_flushed ? FPUS_UE | FPUS_PE : 3286 0)); 3287 } 3288 3289 void helper_update_mxcsr(CPUX86State *env) 3290 { 3291 update_mxcsr_from_sse_status(env); 3292 } 3293 3294 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3295 { 3296 cpu_set_mxcsr(env, val); 3297 } 3298 3299 void helper_enter_mmx(CPUX86State *env) 3300 { 3301 env->fpstt = 0; 3302 *(uint32_t *)(env->fptags) = 0; 3303 *(uint32_t *)(env->fptags + 4) = 0; 3304 } 3305 3306 void helper_emms(CPUX86State *env) 3307 { 3308 /* set to empty state */ 3309 *(uint32_t *)(env->fptags) = 0x01010101; 3310 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3311 } 3312 3313 #define SHIFT 0 3314 #include "ops_sse.h" 3315 3316 #define SHIFT 1 3317 #include "ops_sse.h" 3318 3319 #define SHIFT 2 3320 #include "ops_sse.h" 3321