1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "fpu/softfloat-macros.h" 29 #include "helper-tcg.h" 30 #include "access.h" 31 32 /* float macros */ 33 #define FT0 (env->ft0) 34 #define ST0 (env->fpregs[env->fpstt].d) 35 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 36 #define ST1 ST(1) 37 38 #define FPU_RC_SHIFT 10 39 #define FPU_RC_MASK (3 << FPU_RC_SHIFT) 40 #define FPU_RC_NEAR 0x000 41 #define FPU_RC_DOWN 0x400 42 #define FPU_RC_UP 0x800 43 #define FPU_RC_CHOP 0xc00 44 45 #define MAXTAN 9223372036854775808.0 46 47 /* the following deal with x86 long double-precision numbers */ 48 #define MAXEXPD 0x7fff 49 #define EXPBIAS 16383 50 #define EXPD(fp) (fp.l.upper & 0x7fff) 51 #define SIGND(fp) ((fp.l.upper) & 0x8000) 52 #define MANTD(fp) (fp.l.lower) 53 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 54 55 #define FPUS_IE (1 << 0) 56 #define FPUS_DE (1 << 1) 57 #define FPUS_ZE (1 << 2) 58 #define FPUS_OE (1 << 3) 59 #define FPUS_UE (1 << 4) 60 #define FPUS_PE (1 << 5) 61 #define FPUS_SF (1 << 6) 62 #define FPUS_SE (1 << 7) 63 #define FPUS_B (1 << 15) 64 65 #define FPUC_EM 0x3f 66 67 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 68 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 69 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 70 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 71 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 72 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 73 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 74 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 75 76 static inline void fpush(CPUX86State *env) 77 { 78 env->fpstt = (env->fpstt - 1) & 7; 79 env->fptags[env->fpstt] = 0; /* validate stack entry */ 80 } 81 82 static inline void fpop(CPUX86State *env) 83 { 84 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 85 env->fpstt = (env->fpstt + 1) & 7; 86 } 87 88 static floatx80 do_fldt(X86Access *ac, target_ulong ptr) 89 { 90 CPU_LDoubleU temp; 91 92 temp.l.lower = access_ldq(ac, ptr); 93 temp.l.upper = access_ldw(ac, ptr + 8); 94 return temp.d; 95 } 96 97 static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f) 98 { 99 CPU_LDoubleU temp; 100 101 temp.d = f; 102 access_stq(ac, ptr, temp.l.lower); 103 access_stw(ac, ptr + 8, temp.l.upper); 104 } 105 106 /* x87 FPU helpers */ 107 108 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 109 { 110 union { 111 float64 f64; 112 double d; 113 } u; 114 115 u.f64 = floatx80_to_float64(a, &env->fp_status); 116 return u.d; 117 } 118 119 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 120 { 121 union { 122 float64 f64; 123 double d; 124 } u; 125 126 u.d = a; 127 return float64_to_floatx80(u.f64, &env->fp_status); 128 } 129 130 static void fpu_set_exception(CPUX86State *env, int mask) 131 { 132 env->fpus |= mask; 133 if (env->fpus & (~env->fpuc & FPUC_EM)) { 134 env->fpus |= FPUS_SE | FPUS_B; 135 } 136 } 137 138 void cpu_init_fp_statuses(CPUX86State *env) 139 { 140 /* 141 * Initialise the non-runtime-varying fields of the various 142 * float_status words to x86 behaviour. This must be called at 143 * CPU reset because the float_status words are in the 144 * "zeroed on reset" portion of the CPU state struct. 145 * Fields in float_status that vary under guest control are set 146 * via the codepath for setting that register, eg cpu_set_fpuc(). 147 */ 148 /* 149 * Use x87 NaN propagation rules: 150 * SNaN + QNaN => return the QNaN 151 * two SNaNs => return the one with the larger significand, silenced 152 * two QNaNs => return the one with the larger significand 153 * SNaN and a non-NaN => return the SNaN, silenced 154 * QNaN and a non-NaN => return the QNaN 155 * 156 * If we get down to comparing significands and they are the same, 157 * return the NaN with the positive sign bit (if any). 158 */ 159 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); 160 /* 161 * TODO: These are incorrect: the x86 Software Developer's Manual vol 1 162 * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the 163 * "larger significand" behaviour is only used for x87 FPU operations. 164 * For SSE the required behaviour is to always return the first NaN, 165 * which is float_2nan_prop_ab. 166 * 167 * mmx_status is used only for the AMD 3DNow! instructions, which 168 * are documented in the "3DNow! Technology Manual" as not supporting 169 * NaNs or infinities as inputs. The result of passing two NaNs is 170 * documented as "undefined", so we can do what we choose. 171 * (Strictly there is some behaviour we don't implement correctly 172 * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".) 173 */ 174 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status); 175 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status); 176 /* 177 * Only SSE has multiply-add instructions. In the SDM Section 14.5.2 178 * "Fused-Multiply-ADD (FMA) Numeric Behavior" the NaN handling is 179 * specified -- for 0 * inf + NaN the input NaN is selected, and if 180 * there are multiple input NaNs they are selected in the order a, b, c. 181 * We also do not raise Invalid for the 0 * inf + (Q)NaN case. 182 */ 183 set_float_infzeronan_rule(float_infzeronan_dnan_never | 184 float_infzeronan_suppress_invalid, 185 &env->sse_status); 186 set_float_3nan_prop_rule(float_3nan_prop_abc, &env->sse_status); 187 /* Default NaN: sign bit set, most significant frac bit set */ 188 set_float_default_nan_pattern(0b11000000, &env->fp_status); 189 set_float_default_nan_pattern(0b11000000, &env->mmx_status); 190 set_float_default_nan_pattern(0b11000000, &env->sse_status); 191 /* 192 * TODO: x86 does flush-to-zero detection after rounding (the SDM 193 * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush 194 * when we detect underflow, which x86 does after rounding). 195 */ 196 set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); 197 set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); 198 set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); 199 } 200 201 static inline uint8_t save_exception_flags(CPUX86State *env) 202 { 203 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 204 set_float_exception_flags(0, &env->fp_status); 205 return old_flags; 206 } 207 208 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 209 { 210 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 211 float_raise(old_flags, &env->fp_status); 212 fpu_set_exception(env, 213 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 214 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 215 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 216 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 217 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 218 (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0))); 219 } 220 221 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 222 { 223 uint8_t old_flags = save_exception_flags(env); 224 floatx80 ret = floatx80_div(a, b, &env->fp_status); 225 merge_exception_flags(env, old_flags); 226 return ret; 227 } 228 229 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 230 { 231 if (env->cr[0] & CR0_NE_MASK) { 232 raise_exception_ra(env, EXCP10_COPR, retaddr); 233 } 234 #if !defined(CONFIG_USER_ONLY) 235 else { 236 fpu_check_raise_ferr_irq(env); 237 } 238 #endif 239 } 240 241 void helper_flds_FT0(CPUX86State *env, uint32_t val) 242 { 243 uint8_t old_flags = save_exception_flags(env); 244 union { 245 float32 f; 246 uint32_t i; 247 } u; 248 249 u.i = val; 250 FT0 = float32_to_floatx80(u.f, &env->fp_status); 251 merge_exception_flags(env, old_flags); 252 } 253 254 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 255 { 256 uint8_t old_flags = save_exception_flags(env); 257 union { 258 float64 f; 259 uint64_t i; 260 } u; 261 262 u.i = val; 263 FT0 = float64_to_floatx80(u.f, &env->fp_status); 264 merge_exception_flags(env, old_flags); 265 } 266 267 void helper_fildl_FT0(CPUX86State *env, int32_t val) 268 { 269 FT0 = int32_to_floatx80(val, &env->fp_status); 270 } 271 272 void helper_flds_ST0(CPUX86State *env, uint32_t val) 273 { 274 uint8_t old_flags = save_exception_flags(env); 275 int new_fpstt; 276 union { 277 float32 f; 278 uint32_t i; 279 } u; 280 281 new_fpstt = (env->fpstt - 1) & 7; 282 u.i = val; 283 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 284 env->fpstt = new_fpstt; 285 env->fptags[new_fpstt] = 0; /* validate stack entry */ 286 merge_exception_flags(env, old_flags); 287 } 288 289 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 290 { 291 uint8_t old_flags = save_exception_flags(env); 292 int new_fpstt; 293 union { 294 float64 f; 295 uint64_t i; 296 } u; 297 298 new_fpstt = (env->fpstt - 1) & 7; 299 u.i = val; 300 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 301 env->fpstt = new_fpstt; 302 env->fptags[new_fpstt] = 0; /* validate stack entry */ 303 merge_exception_flags(env, old_flags); 304 } 305 306 static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 307 { 308 FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 309 set_floatx80_rounding_precision(floatx80_precision_x, st); 310 return old; 311 } 312 313 void helper_fildl_ST0(CPUX86State *env, int32_t val) 314 { 315 int new_fpstt; 316 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 317 318 new_fpstt = (env->fpstt - 1) & 7; 319 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 320 env->fpstt = new_fpstt; 321 env->fptags[new_fpstt] = 0; /* validate stack entry */ 322 323 set_floatx80_rounding_precision(old, &env->fp_status); 324 } 325 326 void helper_fildll_ST0(CPUX86State *env, int64_t val) 327 { 328 int new_fpstt; 329 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 330 331 new_fpstt = (env->fpstt - 1) & 7; 332 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 333 env->fpstt = new_fpstt; 334 env->fptags[new_fpstt] = 0; /* validate stack entry */ 335 336 set_floatx80_rounding_precision(old, &env->fp_status); 337 } 338 339 uint32_t helper_fsts_ST0(CPUX86State *env) 340 { 341 uint8_t old_flags = save_exception_flags(env); 342 union { 343 float32 f; 344 uint32_t i; 345 } u; 346 347 u.f = floatx80_to_float32(ST0, &env->fp_status); 348 merge_exception_flags(env, old_flags); 349 return u.i; 350 } 351 352 uint64_t helper_fstl_ST0(CPUX86State *env) 353 { 354 uint8_t old_flags = save_exception_flags(env); 355 union { 356 float64 f; 357 uint64_t i; 358 } u; 359 360 u.f = floatx80_to_float64(ST0, &env->fp_status); 361 merge_exception_flags(env, old_flags); 362 return u.i; 363 } 364 365 int32_t helper_fist_ST0(CPUX86State *env) 366 { 367 uint8_t old_flags = save_exception_flags(env); 368 int32_t val; 369 370 val = floatx80_to_int32(ST0, &env->fp_status); 371 if (val != (int16_t)val) { 372 set_float_exception_flags(float_flag_invalid, &env->fp_status); 373 val = -32768; 374 } 375 merge_exception_flags(env, old_flags); 376 return val; 377 } 378 379 int32_t helper_fistl_ST0(CPUX86State *env) 380 { 381 uint8_t old_flags = save_exception_flags(env); 382 int32_t val; 383 384 val = floatx80_to_int32(ST0, &env->fp_status); 385 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 386 val = 0x80000000; 387 } 388 merge_exception_flags(env, old_flags); 389 return val; 390 } 391 392 int64_t helper_fistll_ST0(CPUX86State *env) 393 { 394 uint8_t old_flags = save_exception_flags(env); 395 int64_t val; 396 397 val = floatx80_to_int64(ST0, &env->fp_status); 398 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 399 val = 0x8000000000000000ULL; 400 } 401 merge_exception_flags(env, old_flags); 402 return val; 403 } 404 405 int32_t helper_fistt_ST0(CPUX86State *env) 406 { 407 uint8_t old_flags = save_exception_flags(env); 408 int32_t val; 409 410 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 411 if (val != (int16_t)val) { 412 set_float_exception_flags(float_flag_invalid, &env->fp_status); 413 val = -32768; 414 } 415 merge_exception_flags(env, old_flags); 416 return val; 417 } 418 419 int32_t helper_fisttl_ST0(CPUX86State *env) 420 { 421 uint8_t old_flags = save_exception_flags(env); 422 int32_t val; 423 424 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 425 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 426 val = 0x80000000; 427 } 428 merge_exception_flags(env, old_flags); 429 return val; 430 } 431 432 int64_t helper_fisttll_ST0(CPUX86State *env) 433 { 434 uint8_t old_flags = save_exception_flags(env); 435 int64_t val; 436 437 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 438 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 439 val = 0x8000000000000000ULL; 440 } 441 merge_exception_flags(env, old_flags); 442 return val; 443 } 444 445 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 446 { 447 int new_fpstt; 448 X86Access ac; 449 450 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); 451 452 new_fpstt = (env->fpstt - 1) & 7; 453 env->fpregs[new_fpstt].d = do_fldt(&ac, ptr); 454 env->fpstt = new_fpstt; 455 env->fptags[new_fpstt] = 0; /* validate stack entry */ 456 } 457 458 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 459 { 460 X86Access ac; 461 462 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); 463 do_fstt(&ac, ptr, ST0); 464 } 465 466 void helper_fpush(CPUX86State *env) 467 { 468 fpush(env); 469 } 470 471 void helper_fpop(CPUX86State *env) 472 { 473 fpop(env); 474 } 475 476 void helper_fdecstp(CPUX86State *env) 477 { 478 env->fpstt = (env->fpstt - 1) & 7; 479 env->fpus &= ~0x4700; 480 } 481 482 void helper_fincstp(CPUX86State *env) 483 { 484 env->fpstt = (env->fpstt + 1) & 7; 485 env->fpus &= ~0x4700; 486 } 487 488 /* FPU move */ 489 490 void helper_ffree_STN(CPUX86State *env, int st_index) 491 { 492 env->fptags[(env->fpstt + st_index) & 7] = 1; 493 } 494 495 void helper_fmov_ST0_FT0(CPUX86State *env) 496 { 497 ST0 = FT0; 498 } 499 500 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 501 { 502 FT0 = ST(st_index); 503 } 504 505 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 506 { 507 ST0 = ST(st_index); 508 } 509 510 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 511 { 512 ST(st_index) = ST0; 513 } 514 515 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 516 { 517 floatx80 tmp; 518 519 tmp = ST(st_index); 520 ST(st_index) = ST0; 521 ST0 = tmp; 522 } 523 524 /* FPU operations */ 525 526 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 527 528 void helper_fcom_ST0_FT0(CPUX86State *env) 529 { 530 uint8_t old_flags = save_exception_flags(env); 531 FloatRelation ret; 532 533 ret = floatx80_compare(ST0, FT0, &env->fp_status); 534 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 535 merge_exception_flags(env, old_flags); 536 } 537 538 void helper_fucom_ST0_FT0(CPUX86State *env) 539 { 540 uint8_t old_flags = save_exception_flags(env); 541 FloatRelation ret; 542 543 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 544 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 545 merge_exception_flags(env, old_flags); 546 } 547 548 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 549 550 void helper_fcomi_ST0_FT0(CPUX86State *env) 551 { 552 uint8_t old_flags = save_exception_flags(env); 553 int eflags; 554 FloatRelation ret; 555 556 ret = floatx80_compare(ST0, FT0, &env->fp_status); 557 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 558 CC_SRC = eflags | fcomi_ccval[ret + 1]; 559 CC_OP = CC_OP_EFLAGS; 560 merge_exception_flags(env, old_flags); 561 } 562 563 void helper_fucomi_ST0_FT0(CPUX86State *env) 564 { 565 uint8_t old_flags = save_exception_flags(env); 566 int eflags; 567 FloatRelation ret; 568 569 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 570 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 571 CC_SRC = eflags | fcomi_ccval[ret + 1]; 572 CC_OP = CC_OP_EFLAGS; 573 merge_exception_flags(env, old_flags); 574 } 575 576 void helper_fadd_ST0_FT0(CPUX86State *env) 577 { 578 uint8_t old_flags = save_exception_flags(env); 579 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 580 merge_exception_flags(env, old_flags); 581 } 582 583 void helper_fmul_ST0_FT0(CPUX86State *env) 584 { 585 uint8_t old_flags = save_exception_flags(env); 586 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 587 merge_exception_flags(env, old_flags); 588 } 589 590 void helper_fsub_ST0_FT0(CPUX86State *env) 591 { 592 uint8_t old_flags = save_exception_flags(env); 593 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 594 merge_exception_flags(env, old_flags); 595 } 596 597 void helper_fsubr_ST0_FT0(CPUX86State *env) 598 { 599 uint8_t old_flags = save_exception_flags(env); 600 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 601 merge_exception_flags(env, old_flags); 602 } 603 604 void helper_fdiv_ST0_FT0(CPUX86State *env) 605 { 606 ST0 = helper_fdiv(env, ST0, FT0); 607 } 608 609 void helper_fdivr_ST0_FT0(CPUX86State *env) 610 { 611 ST0 = helper_fdiv(env, FT0, ST0); 612 } 613 614 /* fp operations between STN and ST0 */ 615 616 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 617 { 618 uint8_t old_flags = save_exception_flags(env); 619 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 620 merge_exception_flags(env, old_flags); 621 } 622 623 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 624 { 625 uint8_t old_flags = save_exception_flags(env); 626 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 627 merge_exception_flags(env, old_flags); 628 } 629 630 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 631 { 632 uint8_t old_flags = save_exception_flags(env); 633 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 634 merge_exception_flags(env, old_flags); 635 } 636 637 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 638 { 639 uint8_t old_flags = save_exception_flags(env); 640 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 641 merge_exception_flags(env, old_flags); 642 } 643 644 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 645 { 646 floatx80 *p; 647 648 p = &ST(st_index); 649 *p = helper_fdiv(env, *p, ST0); 650 } 651 652 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 653 { 654 floatx80 *p; 655 656 p = &ST(st_index); 657 *p = helper_fdiv(env, ST0, *p); 658 } 659 660 /* misc FPU operations */ 661 void helper_fchs_ST0(CPUX86State *env) 662 { 663 ST0 = floatx80_chs(ST0); 664 } 665 666 void helper_fabs_ST0(CPUX86State *env) 667 { 668 ST0 = floatx80_abs(ST0); 669 } 670 671 void helper_fld1_ST0(CPUX86State *env) 672 { 673 ST0 = floatx80_one; 674 } 675 676 void helper_fldl2t_ST0(CPUX86State *env) 677 { 678 switch (env->fpuc & FPU_RC_MASK) { 679 case FPU_RC_UP: 680 ST0 = floatx80_l2t_u; 681 break; 682 default: 683 ST0 = floatx80_l2t; 684 break; 685 } 686 } 687 688 void helper_fldl2e_ST0(CPUX86State *env) 689 { 690 switch (env->fpuc & FPU_RC_MASK) { 691 case FPU_RC_DOWN: 692 case FPU_RC_CHOP: 693 ST0 = floatx80_l2e_d; 694 break; 695 default: 696 ST0 = floatx80_l2e; 697 break; 698 } 699 } 700 701 void helper_fldpi_ST0(CPUX86State *env) 702 { 703 switch (env->fpuc & FPU_RC_MASK) { 704 case FPU_RC_DOWN: 705 case FPU_RC_CHOP: 706 ST0 = floatx80_pi_d; 707 break; 708 default: 709 ST0 = floatx80_pi; 710 break; 711 } 712 } 713 714 void helper_fldlg2_ST0(CPUX86State *env) 715 { 716 switch (env->fpuc & FPU_RC_MASK) { 717 case FPU_RC_DOWN: 718 case FPU_RC_CHOP: 719 ST0 = floatx80_lg2_d; 720 break; 721 default: 722 ST0 = floatx80_lg2; 723 break; 724 } 725 } 726 727 void helper_fldln2_ST0(CPUX86State *env) 728 { 729 switch (env->fpuc & FPU_RC_MASK) { 730 case FPU_RC_DOWN: 731 case FPU_RC_CHOP: 732 ST0 = floatx80_ln2_d; 733 break; 734 default: 735 ST0 = floatx80_ln2; 736 break; 737 } 738 } 739 740 void helper_fldz_ST0(CPUX86State *env) 741 { 742 ST0 = floatx80_zero; 743 } 744 745 void helper_fldz_FT0(CPUX86State *env) 746 { 747 FT0 = floatx80_zero; 748 } 749 750 uint32_t helper_fnstsw(CPUX86State *env) 751 { 752 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 753 } 754 755 uint32_t helper_fnstcw(CPUX86State *env) 756 { 757 return env->fpuc; 758 } 759 760 static void set_x86_rounding_mode(unsigned mode, float_status *status) 761 { 762 static FloatRoundMode x86_round_mode[4] = { 763 float_round_nearest_even, 764 float_round_down, 765 float_round_up, 766 float_round_to_zero 767 }; 768 assert(mode < ARRAY_SIZE(x86_round_mode)); 769 set_float_rounding_mode(x86_round_mode[mode], status); 770 } 771 772 void update_fp_status(CPUX86State *env) 773 { 774 int rnd_mode; 775 FloatX80RoundPrec rnd_prec; 776 777 /* set rounding mode */ 778 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; 779 set_x86_rounding_mode(rnd_mode, &env->fp_status); 780 781 switch ((env->fpuc >> 8) & 3) { 782 case 0: 783 rnd_prec = floatx80_precision_s; 784 break; 785 case 2: 786 rnd_prec = floatx80_precision_d; 787 break; 788 case 3: 789 default: 790 rnd_prec = floatx80_precision_x; 791 break; 792 } 793 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 794 } 795 796 void helper_fldcw(CPUX86State *env, uint32_t val) 797 { 798 cpu_set_fpuc(env, val); 799 } 800 801 void helper_fclex(CPUX86State *env) 802 { 803 env->fpus &= 0x7f00; 804 } 805 806 void helper_fwait(CPUX86State *env) 807 { 808 if (env->fpus & FPUS_SE) { 809 fpu_raise_exception(env, GETPC()); 810 } 811 } 812 813 static void do_fninit(CPUX86State *env) 814 { 815 env->fpus = 0; 816 env->fpstt = 0; 817 env->fpcs = 0; 818 env->fpds = 0; 819 env->fpip = 0; 820 env->fpdp = 0; 821 cpu_set_fpuc(env, 0x37f); 822 env->fptags[0] = 1; 823 env->fptags[1] = 1; 824 env->fptags[2] = 1; 825 env->fptags[3] = 1; 826 env->fptags[4] = 1; 827 env->fptags[5] = 1; 828 env->fptags[6] = 1; 829 env->fptags[7] = 1; 830 } 831 832 void helper_fninit(CPUX86State *env) 833 { 834 do_fninit(env); 835 } 836 837 /* BCD ops */ 838 839 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 840 { 841 X86Access ac; 842 floatx80 tmp; 843 uint64_t val; 844 unsigned int v; 845 int i; 846 847 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); 848 849 val = 0; 850 for (i = 8; i >= 0; i--) { 851 v = access_ldb(&ac, ptr + i); 852 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 853 } 854 tmp = int64_to_floatx80(val, &env->fp_status); 855 if (access_ldb(&ac, ptr + 9) & 0x80) { 856 tmp = floatx80_chs(tmp); 857 } 858 fpush(env); 859 ST0 = tmp; 860 } 861 862 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 863 { 864 uint8_t old_flags = save_exception_flags(env); 865 int v; 866 target_ulong mem_ref, mem_end; 867 int64_t val; 868 CPU_LDoubleU temp; 869 X86Access ac; 870 871 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); 872 temp.d = ST0; 873 874 val = floatx80_to_int64(ST0, &env->fp_status); 875 mem_ref = ptr; 876 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 877 set_float_exception_flags(float_flag_invalid, &env->fp_status); 878 while (mem_ref < ptr + 7) { 879 access_stb(&ac, mem_ref++, 0); 880 } 881 access_stb(&ac, mem_ref++, 0xc0); 882 access_stb(&ac, mem_ref++, 0xff); 883 access_stb(&ac, mem_ref++, 0xff); 884 merge_exception_flags(env, old_flags); 885 return; 886 } 887 mem_end = mem_ref + 9; 888 if (SIGND(temp)) { 889 access_stb(&ac, mem_end, 0x80); 890 val = -val; 891 } else { 892 access_stb(&ac, mem_end, 0x00); 893 } 894 while (mem_ref < mem_end) { 895 if (val == 0) { 896 break; 897 } 898 v = val % 100; 899 val = val / 100; 900 v = ((v / 10) << 4) | (v % 10); 901 access_stb(&ac, mem_ref++, v); 902 } 903 while (mem_ref < mem_end) { 904 access_stb(&ac, mem_ref++, 0); 905 } 906 merge_exception_flags(env, old_flags); 907 } 908 909 /* 128-bit significand of log(2). */ 910 #define ln2_sig_high 0xb17217f7d1cf79abULL 911 #define ln2_sig_low 0xc9e3b39803f2f6afULL 912 913 /* 914 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 915 * the interval [-1/64, 1/64]. 916 */ 917 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 918 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 919 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 920 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 921 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 922 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 923 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 924 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 925 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 926 927 struct f2xm1_data { 928 /* 929 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 930 * are very close to exact floatx80 values. 931 */ 932 floatx80 t; 933 /* The value of 2^t. */ 934 floatx80 exp2; 935 /* The value of 2^t - 1. */ 936 floatx80 exp2m1; 937 }; 938 939 static const struct f2xm1_data f2xm1_table[65] = { 940 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 941 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 942 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 943 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 944 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 945 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 946 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 947 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 948 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 949 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 950 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 951 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 952 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 953 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 954 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 955 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 956 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 957 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 958 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 959 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 960 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 961 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 962 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 963 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 964 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 965 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 966 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 967 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 968 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 969 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 970 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 971 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 972 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 973 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 974 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 975 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 976 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 977 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 978 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 979 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 980 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 981 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 982 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 983 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 984 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 985 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 986 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 987 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 988 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 989 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 990 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 991 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 992 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 993 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 994 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 995 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 996 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 997 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 998 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 999 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 1000 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 1001 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 1002 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 1003 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 1004 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 1005 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 1006 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 1007 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 1008 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 1009 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 1010 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 1011 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 1012 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 1013 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 1014 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 1015 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 1016 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 1017 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 1018 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 1019 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 1020 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 1021 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 1022 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 1023 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 1024 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 1025 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 1026 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 1027 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 1028 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 1029 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 1030 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 1031 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 1032 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 1033 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 1034 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 1035 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 1036 { floatx80_zero_init, 1037 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1038 floatx80_zero_init }, 1039 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 1040 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 1041 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 1042 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 1043 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 1044 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 1045 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 1046 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 1047 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 1048 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 1049 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 1050 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 1051 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 1052 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 1053 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 1054 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 1055 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 1056 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 1057 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 1058 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 1059 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 1060 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 1061 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 1062 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 1063 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 1064 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 1065 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 1066 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 1067 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 1068 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 1069 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 1070 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 1071 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 1072 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 1073 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 1074 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1075 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1076 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1077 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1078 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1079 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1080 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1081 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1082 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1083 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1084 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1085 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1086 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1087 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1088 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1089 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1090 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1091 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1092 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1093 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1094 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1095 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1096 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1097 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1098 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1099 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1100 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1101 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1102 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1103 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1104 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1105 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1106 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1107 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1108 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1109 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1110 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1111 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1112 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1113 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1114 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1115 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1116 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1117 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1118 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1119 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1120 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1121 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1122 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1123 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1124 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1125 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1126 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1127 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1128 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1129 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1130 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1131 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1132 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1133 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1134 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1135 }; 1136 1137 void helper_f2xm1(CPUX86State *env) 1138 { 1139 uint8_t old_flags = save_exception_flags(env); 1140 uint64_t sig = extractFloatx80Frac(ST0); 1141 int32_t exp = extractFloatx80Exp(ST0); 1142 bool sign = extractFloatx80Sign(ST0); 1143 1144 if (floatx80_invalid_encoding(ST0)) { 1145 float_raise(float_flag_invalid, &env->fp_status); 1146 ST0 = floatx80_default_nan(&env->fp_status); 1147 } else if (floatx80_is_any_nan(ST0)) { 1148 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1149 float_raise(float_flag_invalid, &env->fp_status); 1150 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1151 } 1152 } else if (exp > 0x3fff || 1153 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1154 /* Out of range for the instruction, treat as invalid. */ 1155 float_raise(float_flag_invalid, &env->fp_status); 1156 ST0 = floatx80_default_nan(&env->fp_status); 1157 } else if (exp == 0x3fff) { 1158 /* Argument 1 or -1, exact result 1 or -0.5. */ 1159 if (sign) { 1160 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1161 } 1162 } else if (exp < 0x3fb0) { 1163 if (!floatx80_is_zero(ST0)) { 1164 /* 1165 * Multiplying the argument by an extra-precision version 1166 * of log(2) is sufficiently precise. Zero arguments are 1167 * returned unchanged. 1168 */ 1169 uint64_t sig0, sig1, sig2; 1170 if (exp == 0) { 1171 normalizeFloatx80Subnormal(sig, &exp, &sig); 1172 } 1173 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1174 &sig2); 1175 /* This result is inexact. */ 1176 sig1 |= 1; 1177 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1178 sign, exp, sig0, sig1, 1179 &env->fp_status); 1180 } 1181 } else { 1182 floatx80 tmp, y, accum; 1183 bool asign, bsign; 1184 int32_t n, aexp, bexp; 1185 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1186 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1187 FloatX80RoundPrec save_prec = 1188 env->fp_status.floatx80_rounding_precision; 1189 env->fp_status.float_rounding_mode = float_round_nearest_even; 1190 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1191 1192 /* Find the nearest multiple of 1/32 to the argument. */ 1193 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1194 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1195 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1196 1197 if (floatx80_is_zero(y)) { 1198 /* 1199 * Use the value of 2^t - 1 from the table, to avoid 1200 * needing to special-case zero as a result of 1201 * multiplication below. 1202 */ 1203 ST0 = f2xm1_table[n].t; 1204 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1205 env->fp_status.float_rounding_mode = save_mode; 1206 } else { 1207 /* 1208 * Compute the lower parts of a polynomial expansion for 1209 * (2^y - 1) / y. 1210 */ 1211 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1212 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1213 accum = floatx80_mul(accum, y, &env->fp_status); 1214 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1215 accum = floatx80_mul(accum, y, &env->fp_status); 1216 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1217 accum = floatx80_mul(accum, y, &env->fp_status); 1218 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1219 accum = floatx80_mul(accum, y, &env->fp_status); 1220 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1221 accum = floatx80_mul(accum, y, &env->fp_status); 1222 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1223 accum = floatx80_mul(accum, y, &env->fp_status); 1224 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1225 1226 /* 1227 * The full polynomial expansion is f2xm1_coeff_0 + accum 1228 * (where accum has much lower magnitude, and so, in 1229 * particular, carry out of the addition is not possible). 1230 * (This expansion is only accurate to about 70 bits, not 1231 * 128 bits.) 1232 */ 1233 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1234 asign = extractFloatx80Sign(f2xm1_coeff_0); 1235 shift128RightJamming(extractFloatx80Frac(accum), 0, 1236 aexp - extractFloatx80Exp(accum), 1237 &asig0, &asig1); 1238 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1239 bsig1 = 0; 1240 if (asign == extractFloatx80Sign(accum)) { 1241 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1242 } else { 1243 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1244 } 1245 /* And thus compute an approximation to 2^y - 1. */ 1246 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1247 &asig0, &asig1, &asig2); 1248 aexp += extractFloatx80Exp(y) - 0x3ffe; 1249 asign ^= extractFloatx80Sign(y); 1250 if (n != 32) { 1251 /* 1252 * Multiply this by the precomputed value of 2^t and 1253 * add that of 2^t - 1. 1254 */ 1255 mul128By64To192(asig0, asig1, 1256 extractFloatx80Frac(f2xm1_table[n].exp2), 1257 &asig0, &asig1, &asig2); 1258 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1259 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1260 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1261 bsig1 = 0; 1262 if (bexp < aexp) { 1263 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1264 &bsig0, &bsig1); 1265 } else if (aexp < bexp) { 1266 shift128RightJamming(asig0, asig1, bexp - aexp, 1267 &asig0, &asig1); 1268 aexp = bexp; 1269 } 1270 /* The sign of 2^t - 1 is always that of the result. */ 1271 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1272 if (asign == bsign) { 1273 /* Avoid possible carry out of the addition. */ 1274 shift128RightJamming(asig0, asig1, 1, 1275 &asig0, &asig1); 1276 shift128RightJamming(bsig0, bsig1, 1, 1277 &bsig0, &bsig1); 1278 ++aexp; 1279 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1280 } else { 1281 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1282 asign = bsign; 1283 } 1284 } 1285 env->fp_status.float_rounding_mode = save_mode; 1286 /* This result is inexact. */ 1287 asig1 |= 1; 1288 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1289 asign, aexp, asig0, asig1, 1290 &env->fp_status); 1291 } 1292 1293 env->fp_status.floatx80_rounding_precision = save_prec; 1294 } 1295 merge_exception_flags(env, old_flags); 1296 } 1297 1298 void helper_fptan(CPUX86State *env) 1299 { 1300 double fptemp = floatx80_to_double(env, ST0); 1301 1302 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1303 env->fpus |= 0x400; 1304 } else { 1305 fptemp = tan(fptemp); 1306 ST0 = double_to_floatx80(env, fptemp); 1307 fpush(env); 1308 ST0 = floatx80_one; 1309 env->fpus &= ~0x400; /* C2 <-- 0 */ 1310 /* the above code is for |arg| < 2**52 only */ 1311 } 1312 } 1313 1314 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1315 #define pi_4_exp 0x3ffe 1316 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1317 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1318 #define pi_2_exp 0x3fff 1319 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1320 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1321 #define pi_34_exp 0x4000 1322 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1323 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1324 #define pi_exp 0x4000 1325 #define pi_sig_high 0xc90fdaa22168c234ULL 1326 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1327 1328 /* 1329 * Polynomial coefficients for an approximation to atan(x), with only 1330 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1331 * for some other approximations, no low part is needed for the first 1332 * coefficient here to achieve a sufficiently accurate result, because 1333 * the coefficient in this minimax approximation is very close to 1334 * exactly 1.) 1335 */ 1336 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1337 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1338 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1339 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1340 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1341 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1342 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1343 1344 struct fpatan_data { 1345 /* High and low parts of atan(x). */ 1346 floatx80 atan_high, atan_low; 1347 }; 1348 1349 static const struct fpatan_data fpatan_table[9] = { 1350 { floatx80_zero_init, 1351 floatx80_zero_init }, 1352 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1353 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1354 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1355 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1356 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1357 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1358 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1359 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1360 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1361 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1362 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1363 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1364 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1365 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1366 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1367 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1368 }; 1369 1370 void helper_fpatan(CPUX86State *env) 1371 { 1372 uint8_t old_flags = save_exception_flags(env); 1373 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1374 int32_t arg0_exp = extractFloatx80Exp(ST0); 1375 bool arg0_sign = extractFloatx80Sign(ST0); 1376 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1377 int32_t arg1_exp = extractFloatx80Exp(ST1); 1378 bool arg1_sign = extractFloatx80Sign(ST1); 1379 1380 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1381 float_raise(float_flag_invalid, &env->fp_status); 1382 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1383 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1384 float_raise(float_flag_invalid, &env->fp_status); 1385 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1386 } else if (floatx80_invalid_encoding(ST0) || 1387 floatx80_invalid_encoding(ST1)) { 1388 float_raise(float_flag_invalid, &env->fp_status); 1389 ST1 = floatx80_default_nan(&env->fp_status); 1390 } else if (floatx80_is_any_nan(ST0)) { 1391 ST1 = ST0; 1392 } else if (floatx80_is_any_nan(ST1)) { 1393 /* Pass this NaN through. */ 1394 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1395 /* Pass this zero through. */ 1396 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1397 arg0_exp - arg1_exp >= 80) && 1398 !arg0_sign) { 1399 /* 1400 * Dividing ST1 by ST0 gives the correct result up to 1401 * rounding, and avoids spurious underflow exceptions that 1402 * might result from passing some small values through the 1403 * polynomial approximation, but if a finite nonzero result of 1404 * division is exact, the result of fpatan is still inexact 1405 * (and underflowing where appropriate). 1406 */ 1407 FloatX80RoundPrec save_prec = 1408 env->fp_status.floatx80_rounding_precision; 1409 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1410 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1411 env->fp_status.floatx80_rounding_precision = save_prec; 1412 if (!floatx80_is_zero(ST1) && 1413 !(get_float_exception_flags(&env->fp_status) & 1414 float_flag_inexact)) { 1415 /* 1416 * The mathematical result is very slightly closer to zero 1417 * than this exact result. Round a value with the 1418 * significand adjusted accordingly to get the correct 1419 * exceptions, and possibly an adjusted result depending 1420 * on the rounding mode. 1421 */ 1422 uint64_t sig = extractFloatx80Frac(ST1); 1423 int32_t exp = extractFloatx80Exp(ST1); 1424 bool sign = extractFloatx80Sign(ST1); 1425 if (exp == 0) { 1426 normalizeFloatx80Subnormal(sig, &exp, &sig); 1427 } 1428 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1429 sign, exp, sig - 1, 1430 -1, &env->fp_status); 1431 } 1432 } else { 1433 /* The result is inexact. */ 1434 bool rsign = arg1_sign; 1435 int32_t rexp; 1436 uint64_t rsig0, rsig1; 1437 if (floatx80_is_zero(ST1)) { 1438 /* 1439 * ST0 is negative. The result is pi with the sign of 1440 * ST1. 1441 */ 1442 rexp = pi_exp; 1443 rsig0 = pi_sig_high; 1444 rsig1 = pi_sig_low; 1445 } else if (floatx80_is_infinity(ST1)) { 1446 if (floatx80_is_infinity(ST0)) { 1447 if (arg0_sign) { 1448 rexp = pi_34_exp; 1449 rsig0 = pi_34_sig_high; 1450 rsig1 = pi_34_sig_low; 1451 } else { 1452 rexp = pi_4_exp; 1453 rsig0 = pi_4_sig_high; 1454 rsig1 = pi_4_sig_low; 1455 } 1456 } else { 1457 rexp = pi_2_exp; 1458 rsig0 = pi_2_sig_high; 1459 rsig1 = pi_2_sig_low; 1460 } 1461 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1462 rexp = pi_2_exp; 1463 rsig0 = pi_2_sig_high; 1464 rsig1 = pi_2_sig_low; 1465 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1466 /* ST0 is negative. */ 1467 rexp = pi_exp; 1468 rsig0 = pi_sig_high; 1469 rsig1 = pi_sig_low; 1470 } else { 1471 /* 1472 * ST0 and ST1 are finite, nonzero and with exponents not 1473 * too far apart. 1474 */ 1475 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1476 int32_t azexp, axexp; 1477 bool adj_sub, ysign, zsign; 1478 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1479 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1480 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1481 uint64_t azsig0, azsig1; 1482 uint64_t azsig2, azsig3, axsig0, axsig1; 1483 floatx80 x8; 1484 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1485 FloatX80RoundPrec save_prec = 1486 env->fp_status.floatx80_rounding_precision; 1487 env->fp_status.float_rounding_mode = float_round_nearest_even; 1488 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1489 1490 if (arg0_exp == 0) { 1491 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1492 } 1493 if (arg1_exp == 0) { 1494 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1495 } 1496 if (arg0_exp > arg1_exp || 1497 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1498 /* Work with abs(ST1) / abs(ST0). */ 1499 num_exp = arg1_exp; 1500 num_sig = arg1_sig; 1501 den_exp = arg0_exp; 1502 den_sig = arg0_sig; 1503 if (arg0_sign) { 1504 /* The result is subtracted from pi. */ 1505 adj_exp = pi_exp; 1506 adj_sig0 = pi_sig_high; 1507 adj_sig1 = pi_sig_low; 1508 adj_sub = true; 1509 } else { 1510 /* The result is used as-is. */ 1511 adj_exp = 0; 1512 adj_sig0 = 0; 1513 adj_sig1 = 0; 1514 adj_sub = false; 1515 } 1516 } else { 1517 /* Work with abs(ST0) / abs(ST1). */ 1518 num_exp = arg0_exp; 1519 num_sig = arg0_sig; 1520 den_exp = arg1_exp; 1521 den_sig = arg1_sig; 1522 /* The result is added to or subtracted from pi/2. */ 1523 adj_exp = pi_2_exp; 1524 adj_sig0 = pi_2_sig_high; 1525 adj_sig1 = pi_2_sig_low; 1526 adj_sub = !arg0_sign; 1527 } 1528 1529 /* 1530 * Compute x = num/den, where 0 < x <= 1 and x is not too 1531 * small. 1532 */ 1533 xexp = num_exp - den_exp + 0x3ffe; 1534 remsig0 = num_sig; 1535 remsig1 = 0; 1536 if (den_sig <= remsig0) { 1537 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1538 ++xexp; 1539 } 1540 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1541 mul64To128(den_sig, xsig0, &msig0, &msig1); 1542 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1543 while ((int64_t) remsig0 < 0) { 1544 --xsig0; 1545 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1546 } 1547 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1548 /* 1549 * No need to correct any estimation error in xsig1; even 1550 * with such error, it is accurate enough. 1551 */ 1552 1553 /* 1554 * Split x as x = t + y, where t = n/8 is the nearest 1555 * multiple of 1/8 to x. 1556 */ 1557 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1558 false, xexp + 3, xsig0, 1559 xsig1, &env->fp_status); 1560 n = floatx80_to_int32(x8, &env->fp_status); 1561 if (n == 0) { 1562 ysign = false; 1563 yexp = xexp; 1564 ysig0 = xsig0; 1565 ysig1 = xsig1; 1566 texp = 0; 1567 tsig = 0; 1568 } else { 1569 int shift = clz32(n) + 32; 1570 texp = 0x403b - shift; 1571 tsig = n; 1572 tsig <<= shift; 1573 if (texp == xexp) { 1574 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1575 if ((int64_t) ysig0 >= 0) { 1576 ysign = false; 1577 if (ysig0 == 0) { 1578 if (ysig1 == 0) { 1579 yexp = 0; 1580 } else { 1581 shift = clz64(ysig1) + 64; 1582 yexp = xexp - shift; 1583 shift128Left(ysig0, ysig1, shift, 1584 &ysig0, &ysig1); 1585 } 1586 } else { 1587 shift = clz64(ysig0); 1588 yexp = xexp - shift; 1589 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1590 } 1591 } else { 1592 ysign = true; 1593 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1594 if (ysig0 == 0) { 1595 shift = clz64(ysig1) + 64; 1596 } else { 1597 shift = clz64(ysig0); 1598 } 1599 yexp = xexp - shift; 1600 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1601 } 1602 } else { 1603 /* 1604 * t's exponent must be greater than x's because t 1605 * is positive and the nearest multiple of 1/8 to 1606 * x, and if x has a greater exponent, the power 1607 * of 2 with that exponent is also a multiple of 1608 * 1/8. 1609 */ 1610 uint64_t usig0, usig1; 1611 shift128RightJamming(xsig0, xsig1, texp - xexp, 1612 &usig0, &usig1); 1613 ysign = true; 1614 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1615 if (ysig0 == 0) { 1616 shift = clz64(ysig1) + 64; 1617 } else { 1618 shift = clz64(ysig0); 1619 } 1620 yexp = texp - shift; 1621 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1622 } 1623 } 1624 1625 /* 1626 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1627 * arctan(z). 1628 */ 1629 zsign = ysign; 1630 if (texp == 0 || yexp == 0) { 1631 zexp = yexp; 1632 zsig0 = ysig0; 1633 zsig1 = ysig1; 1634 } else { 1635 /* 1636 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1637 */ 1638 int32_t dexp = texp + xexp - 0x3ffe; 1639 uint64_t dsig0, dsig1, dsig2; 1640 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1641 /* 1642 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1643 * bit). Add 1 to produce the denominator 1+tx. 1644 */ 1645 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1646 &dsig0, &dsig1); 1647 dsig0 |= 0x8000000000000000ULL; 1648 zexp = yexp - 1; 1649 remsig0 = ysig0; 1650 remsig1 = ysig1; 1651 remsig2 = 0; 1652 if (dsig0 <= remsig0) { 1653 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1654 ++zexp; 1655 } 1656 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1657 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1658 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1659 &remsig0, &remsig1, &remsig2); 1660 while ((int64_t) remsig0 < 0) { 1661 --zsig0; 1662 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1663 &remsig0, &remsig1, &remsig2); 1664 } 1665 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1666 /* No need to correct any estimation error in zsig1. */ 1667 } 1668 1669 if (zexp == 0) { 1670 azexp = 0; 1671 azsig0 = 0; 1672 azsig1 = 0; 1673 } else { 1674 floatx80 z2, accum; 1675 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1676 /* Compute z^2. */ 1677 mul128To256(zsig0, zsig1, zsig0, zsig1, 1678 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1679 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1680 zexp + zexp - 0x3ffe, 1681 z2sig0, z2sig1, 1682 &env->fp_status); 1683 1684 /* Compute the lower parts of the polynomial expansion. */ 1685 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1686 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1687 accum = floatx80_mul(accum, z2, &env->fp_status); 1688 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1689 accum = floatx80_mul(accum, z2, &env->fp_status); 1690 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1691 accum = floatx80_mul(accum, z2, &env->fp_status); 1692 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1693 accum = floatx80_mul(accum, z2, &env->fp_status); 1694 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1695 accum = floatx80_mul(accum, z2, &env->fp_status); 1696 1697 /* 1698 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1699 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1700 */ 1701 aexp = extractFloatx80Exp(fpatan_coeff_0); 1702 shift128RightJamming(extractFloatx80Frac(accum), 0, 1703 aexp - extractFloatx80Exp(accum), 1704 &asig0, &asig1); 1705 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1706 &asig0, &asig1); 1707 /* Multiply by z to compute arctan(z). */ 1708 azexp = aexp + zexp - 0x3ffe; 1709 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1710 &azsig2, &azsig3); 1711 } 1712 1713 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1714 if (texp == 0) { 1715 /* z is positive. */ 1716 axexp = azexp; 1717 axsig0 = azsig0; 1718 axsig1 = azsig1; 1719 } else { 1720 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1721 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1722 uint64_t low_sig0 = 1723 extractFloatx80Frac(fpatan_table[n].atan_low); 1724 uint64_t low_sig1 = 0; 1725 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1726 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1727 axsig1 = 0; 1728 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1729 &low_sig0, &low_sig1); 1730 if (low_sign) { 1731 sub128(axsig0, axsig1, low_sig0, low_sig1, 1732 &axsig0, &axsig1); 1733 } else { 1734 add128(axsig0, axsig1, low_sig0, low_sig1, 1735 &axsig0, &axsig1); 1736 } 1737 if (azexp >= axexp) { 1738 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1739 &axsig0, &axsig1); 1740 axexp = azexp + 1; 1741 shift128RightJamming(azsig0, azsig1, 1, 1742 &azsig0, &azsig1); 1743 } else { 1744 shift128RightJamming(axsig0, axsig1, 1, 1745 &axsig0, &axsig1); 1746 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1747 &azsig0, &azsig1); 1748 ++axexp; 1749 } 1750 if (zsign) { 1751 sub128(axsig0, axsig1, azsig0, azsig1, 1752 &axsig0, &axsig1); 1753 } else { 1754 add128(axsig0, axsig1, azsig0, azsig1, 1755 &axsig0, &axsig1); 1756 } 1757 } 1758 1759 if (adj_exp == 0) { 1760 rexp = axexp; 1761 rsig0 = axsig0; 1762 rsig1 = axsig1; 1763 } else { 1764 /* 1765 * Add or subtract arctan(x) (exponent axexp, 1766 * significand axsig0 and axsig1, positive, not 1767 * necessarily normalized) to the number given by 1768 * adj_exp, adj_sig0 and adj_sig1, according to 1769 * adj_sub. 1770 */ 1771 if (adj_exp >= axexp) { 1772 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1773 &axsig0, &axsig1); 1774 rexp = adj_exp + 1; 1775 shift128RightJamming(adj_sig0, adj_sig1, 1, 1776 &adj_sig0, &adj_sig1); 1777 } else { 1778 shift128RightJamming(axsig0, axsig1, 1, 1779 &axsig0, &axsig1); 1780 shift128RightJamming(adj_sig0, adj_sig1, 1781 axexp - adj_exp + 1, 1782 &adj_sig0, &adj_sig1); 1783 rexp = axexp + 1; 1784 } 1785 if (adj_sub) { 1786 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1787 &rsig0, &rsig1); 1788 } else { 1789 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1790 &rsig0, &rsig1); 1791 } 1792 } 1793 1794 env->fp_status.float_rounding_mode = save_mode; 1795 env->fp_status.floatx80_rounding_precision = save_prec; 1796 } 1797 /* This result is inexact. */ 1798 rsig1 |= 1; 1799 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1800 rsig0, rsig1, &env->fp_status); 1801 } 1802 1803 fpop(env); 1804 merge_exception_flags(env, old_flags); 1805 } 1806 1807 void helper_fxtract(CPUX86State *env) 1808 { 1809 uint8_t old_flags = save_exception_flags(env); 1810 CPU_LDoubleU temp; 1811 1812 temp.d = ST0; 1813 1814 if (floatx80_is_zero(ST0)) { 1815 /* Easy way to generate -inf and raising division by 0 exception */ 1816 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1817 &env->fp_status); 1818 fpush(env); 1819 ST0 = temp.d; 1820 } else if (floatx80_invalid_encoding(ST0)) { 1821 float_raise(float_flag_invalid, &env->fp_status); 1822 ST0 = floatx80_default_nan(&env->fp_status); 1823 fpush(env); 1824 ST0 = ST1; 1825 } else if (floatx80_is_any_nan(ST0)) { 1826 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1827 float_raise(float_flag_invalid, &env->fp_status); 1828 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1829 } 1830 fpush(env); 1831 ST0 = ST1; 1832 } else if (floatx80_is_infinity(ST0)) { 1833 fpush(env); 1834 ST0 = ST1; 1835 ST1 = floatx80_infinity; 1836 } else { 1837 int expdif; 1838 1839 if (EXPD(temp) == 0) { 1840 int shift = clz64(temp.l.lower); 1841 temp.l.lower <<= shift; 1842 expdif = 1 - EXPBIAS - shift; 1843 float_raise(float_flag_input_denormal_flushed, &env->fp_status); 1844 } else { 1845 expdif = EXPD(temp) - EXPBIAS; 1846 } 1847 /* DP exponent bias */ 1848 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1849 fpush(env); 1850 BIASEXPONENT(temp); 1851 ST0 = temp.d; 1852 } 1853 merge_exception_flags(env, old_flags); 1854 } 1855 1856 static void helper_fprem_common(CPUX86State *env, bool mod) 1857 { 1858 uint8_t old_flags = save_exception_flags(env); 1859 uint64_t quotient; 1860 CPU_LDoubleU temp0, temp1; 1861 int exp0, exp1, expdiff; 1862 1863 temp0.d = ST0; 1864 temp1.d = ST1; 1865 exp0 = EXPD(temp0); 1866 exp1 = EXPD(temp1); 1867 1868 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1869 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1870 exp0 == 0x7fff || exp1 == 0x7fff || 1871 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1872 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1873 } else { 1874 if (exp0 == 0) { 1875 exp0 = 1 - clz64(temp0.l.lower); 1876 } 1877 if (exp1 == 0) { 1878 exp1 = 1 - clz64(temp1.l.lower); 1879 } 1880 expdiff = exp0 - exp1; 1881 if (expdiff < 64) { 1882 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1883 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1884 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1885 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1886 } else { 1887 /* 1888 * Partial remainder. This choice of how many bits to 1889 * process at once is specified in AMD instruction set 1890 * manuals, and empirically is followed by Intel 1891 * processors as well; it ensures that the final remainder 1892 * operation in a loop does produce the correct low three 1893 * bits of the quotient. AMD manuals specify that the 1894 * flags other than C2 are cleared, and empirically Intel 1895 * processors clear them as well. 1896 */ 1897 int n = 32 + (expdiff % 32); 1898 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1899 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1900 env->fpus |= 0x400; /* C2 <-- 1 */ 1901 } 1902 } 1903 merge_exception_flags(env, old_flags); 1904 } 1905 1906 void helper_fprem1(CPUX86State *env) 1907 { 1908 helper_fprem_common(env, false); 1909 } 1910 1911 void helper_fprem(CPUX86State *env) 1912 { 1913 helper_fprem_common(env, true); 1914 } 1915 1916 /* 128-bit significand of log2(e). */ 1917 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1918 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1919 1920 /* 1921 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1922 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1923 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1924 * interval [sqrt(2)/2, sqrt(2)]. 1925 */ 1926 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1927 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1928 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1929 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1930 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1931 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1932 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1933 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1934 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1935 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1936 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1937 1938 /* 1939 * Compute an approximation of log2(1+arg), where 1+arg is in the 1940 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1941 * function is called, rounding precision is set to 80 and the 1942 * round-to-nearest mode is in effect. arg must not be exactly zero, 1943 * and must not be so close to zero that underflow might occur. 1944 */ 1945 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1946 uint64_t *sig0, uint64_t *sig1) 1947 { 1948 uint64_t arg0_sig = extractFloatx80Frac(arg); 1949 int32_t arg0_exp = extractFloatx80Exp(arg); 1950 bool arg0_sign = extractFloatx80Sign(arg); 1951 bool asign; 1952 int32_t dexp, texp, aexp; 1953 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1954 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1955 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1956 floatx80 t2, accum; 1957 1958 /* 1959 * Compute an approximation of arg/(2+arg), with extra precision, 1960 * as the argument to a polynomial approximation. The extra 1961 * precision is only needed for the first term of the 1962 * approximation, with subsequent terms being significantly 1963 * smaller; the approximation only uses odd exponents, and the 1964 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1965 */ 1966 if (arg0_sign) { 1967 dexp = 0x3fff; 1968 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1969 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1970 } else { 1971 dexp = 0x4000; 1972 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1973 dsig0 |= 0x8000000000000000ULL; 1974 } 1975 texp = arg0_exp - dexp + 0x3ffe; 1976 rsig0 = arg0_sig; 1977 rsig1 = 0; 1978 rsig2 = 0; 1979 if (dsig0 <= rsig0) { 1980 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1981 ++texp; 1982 } 1983 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1984 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1985 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1986 &rsig0, &rsig1, &rsig2); 1987 while ((int64_t) rsig0 < 0) { 1988 --tsig0; 1989 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1990 &rsig0, &rsig1, &rsig2); 1991 } 1992 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1993 /* 1994 * No need to correct any estimation error in tsig1; even with 1995 * such error, it is accurate enough. Now compute the square of 1996 * that approximation. 1997 */ 1998 mul128To256(tsig0, tsig1, tsig0, tsig1, 1999 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 2000 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 2001 texp + texp - 0x3ffe, 2002 t2sig0, t2sig1, &env->fp_status); 2003 2004 /* Compute the lower parts of the polynomial expansion. */ 2005 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 2006 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 2007 accum = floatx80_mul(accum, t2, &env->fp_status); 2008 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 2009 accum = floatx80_mul(accum, t2, &env->fp_status); 2010 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 2011 accum = floatx80_mul(accum, t2, &env->fp_status); 2012 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 2013 accum = floatx80_mul(accum, t2, &env->fp_status); 2014 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 2015 accum = floatx80_mul(accum, t2, &env->fp_status); 2016 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 2017 accum = floatx80_mul(accum, t2, &env->fp_status); 2018 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 2019 accum = floatx80_mul(accum, t2, &env->fp_status); 2020 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 2021 accum = floatx80_mul(accum, t2, &env->fp_status); 2022 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 2023 2024 /* 2025 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 2026 * accum has much lower magnitude, and so, in particular, carry 2027 * out of the addition is not possible), multiplied by t. (This 2028 * expansion is only accurate to about 70 bits, not 128 bits.) 2029 */ 2030 aexp = extractFloatx80Exp(fyl2x_coeff_0); 2031 asign = extractFloatx80Sign(fyl2x_coeff_0); 2032 shift128RightJamming(extractFloatx80Frac(accum), 0, 2033 aexp - extractFloatx80Exp(accum), 2034 &asig0, &asig1); 2035 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 2036 bsig1 = 0; 2037 if (asign == extractFloatx80Sign(accum)) { 2038 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 2039 } else { 2040 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 2041 } 2042 /* Multiply by t to compute the required result. */ 2043 mul128To256(asig0, asig1, tsig0, tsig1, 2044 &asig0, &asig1, &asig2, &asig3); 2045 aexp += texp - 0x3ffe; 2046 *exp = aexp; 2047 *sig0 = asig0; 2048 *sig1 = asig1; 2049 } 2050 2051 void helper_fyl2xp1(CPUX86State *env) 2052 { 2053 uint8_t old_flags = save_exception_flags(env); 2054 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2055 int32_t arg0_exp = extractFloatx80Exp(ST0); 2056 bool arg0_sign = extractFloatx80Sign(ST0); 2057 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2058 int32_t arg1_exp = extractFloatx80Exp(ST1); 2059 bool arg1_sign = extractFloatx80Sign(ST1); 2060 2061 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2062 float_raise(float_flag_invalid, &env->fp_status); 2063 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2064 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2065 float_raise(float_flag_invalid, &env->fp_status); 2066 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2067 } else if (floatx80_invalid_encoding(ST0) || 2068 floatx80_invalid_encoding(ST1)) { 2069 float_raise(float_flag_invalid, &env->fp_status); 2070 ST1 = floatx80_default_nan(&env->fp_status); 2071 } else if (floatx80_is_any_nan(ST0)) { 2072 ST1 = ST0; 2073 } else if (floatx80_is_any_nan(ST1)) { 2074 /* Pass this NaN through. */ 2075 } else if (arg0_exp > 0x3ffd || 2076 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2077 0x95f619980c4336f7ULL : 2078 0xd413cccfe7799211ULL))) { 2079 /* 2080 * Out of range for the instruction (ST0 must have absolute 2081 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2082 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2083 * to sqrt(2) - 1, which we allow here), treat as invalid. 2084 */ 2085 float_raise(float_flag_invalid, &env->fp_status); 2086 ST1 = floatx80_default_nan(&env->fp_status); 2087 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2088 arg1_exp == 0x7fff) { 2089 /* 2090 * One argument is zero, or multiplying by infinity; correct 2091 * result is exact and can be obtained by multiplying the 2092 * arguments. 2093 */ 2094 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2095 } else if (arg0_exp < 0x3fb0) { 2096 /* 2097 * Multiplying both arguments and an extra-precision version 2098 * of log2(e) is sufficiently precise. 2099 */ 2100 uint64_t sig0, sig1, sig2; 2101 int32_t exp; 2102 if (arg0_exp == 0) { 2103 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2104 } 2105 if (arg1_exp == 0) { 2106 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2107 } 2108 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2109 &sig0, &sig1, &sig2); 2110 exp = arg0_exp + 1; 2111 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2112 exp += arg1_exp - 0x3ffe; 2113 /* This result is inexact. */ 2114 sig1 |= 1; 2115 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2116 arg0_sign ^ arg1_sign, exp, 2117 sig0, sig1, &env->fp_status); 2118 } else { 2119 int32_t aexp; 2120 uint64_t asig0, asig1, asig2; 2121 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2122 FloatX80RoundPrec save_prec = 2123 env->fp_status.floatx80_rounding_precision; 2124 env->fp_status.float_rounding_mode = float_round_nearest_even; 2125 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2126 2127 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2128 /* 2129 * Multiply by the second argument to compute the required 2130 * result. 2131 */ 2132 if (arg1_exp == 0) { 2133 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2134 } 2135 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2136 aexp += arg1_exp - 0x3ffe; 2137 /* This result is inexact. */ 2138 asig1 |= 1; 2139 env->fp_status.float_rounding_mode = save_mode; 2140 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2141 arg0_sign ^ arg1_sign, aexp, 2142 asig0, asig1, &env->fp_status); 2143 env->fp_status.floatx80_rounding_precision = save_prec; 2144 } 2145 fpop(env); 2146 merge_exception_flags(env, old_flags); 2147 } 2148 2149 void helper_fyl2x(CPUX86State *env) 2150 { 2151 uint8_t old_flags = save_exception_flags(env); 2152 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2153 int32_t arg0_exp = extractFloatx80Exp(ST0); 2154 bool arg0_sign = extractFloatx80Sign(ST0); 2155 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2156 int32_t arg1_exp = extractFloatx80Exp(ST1); 2157 bool arg1_sign = extractFloatx80Sign(ST1); 2158 2159 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2160 float_raise(float_flag_invalid, &env->fp_status); 2161 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2162 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2163 float_raise(float_flag_invalid, &env->fp_status); 2164 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2165 } else if (floatx80_invalid_encoding(ST0) || 2166 floatx80_invalid_encoding(ST1)) { 2167 float_raise(float_flag_invalid, &env->fp_status); 2168 ST1 = floatx80_default_nan(&env->fp_status); 2169 } else if (floatx80_is_any_nan(ST0)) { 2170 ST1 = ST0; 2171 } else if (floatx80_is_any_nan(ST1)) { 2172 /* Pass this NaN through. */ 2173 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2174 float_raise(float_flag_invalid, &env->fp_status); 2175 ST1 = floatx80_default_nan(&env->fp_status); 2176 } else if (floatx80_is_infinity(ST1)) { 2177 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2178 &env->fp_status); 2179 switch (cmp) { 2180 case float_relation_less: 2181 ST1 = floatx80_chs(ST1); 2182 break; 2183 case float_relation_greater: 2184 /* Result is infinity of the same sign as ST1. */ 2185 break; 2186 default: 2187 float_raise(float_flag_invalid, &env->fp_status); 2188 ST1 = floatx80_default_nan(&env->fp_status); 2189 break; 2190 } 2191 } else if (floatx80_is_infinity(ST0)) { 2192 if (floatx80_is_zero(ST1)) { 2193 float_raise(float_flag_invalid, &env->fp_status); 2194 ST1 = floatx80_default_nan(&env->fp_status); 2195 } else if (arg1_sign) { 2196 ST1 = floatx80_chs(ST0); 2197 } else { 2198 ST1 = ST0; 2199 } 2200 } else if (floatx80_is_zero(ST0)) { 2201 if (floatx80_is_zero(ST1)) { 2202 float_raise(float_flag_invalid, &env->fp_status); 2203 ST1 = floatx80_default_nan(&env->fp_status); 2204 } else { 2205 /* Result is infinity with opposite sign to ST1. */ 2206 float_raise(float_flag_divbyzero, &env->fp_status); 2207 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2208 0x8000000000000000ULL); 2209 } 2210 } else if (floatx80_is_zero(ST1)) { 2211 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2212 ST1 = floatx80_chs(ST1); 2213 } 2214 /* Otherwise, ST1 is already the correct result. */ 2215 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2216 if (arg1_sign) { 2217 ST1 = floatx80_chs(floatx80_zero); 2218 } else { 2219 ST1 = floatx80_zero; 2220 } 2221 } else { 2222 int32_t int_exp; 2223 floatx80 arg0_m1; 2224 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2225 FloatX80RoundPrec save_prec = 2226 env->fp_status.floatx80_rounding_precision; 2227 env->fp_status.float_rounding_mode = float_round_nearest_even; 2228 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2229 2230 if (arg0_exp == 0) { 2231 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2232 } 2233 if (arg1_exp == 0) { 2234 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2235 } 2236 int_exp = arg0_exp - 0x3fff; 2237 if (arg0_sig > 0xb504f333f9de6484ULL) { 2238 ++int_exp; 2239 } 2240 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2241 &env->fp_status), 2242 floatx80_one, &env->fp_status); 2243 if (floatx80_is_zero(arg0_m1)) { 2244 /* Exact power of 2; multiply by ST1. */ 2245 env->fp_status.float_rounding_mode = save_mode; 2246 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2247 ST1, &env->fp_status); 2248 } else { 2249 bool asign = extractFloatx80Sign(arg0_m1); 2250 int32_t aexp; 2251 uint64_t asig0, asig1, asig2; 2252 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2253 if (int_exp != 0) { 2254 bool isign = (int_exp < 0); 2255 int32_t iexp; 2256 uint64_t isig; 2257 int shift; 2258 int_exp = isign ? -int_exp : int_exp; 2259 shift = clz32(int_exp) + 32; 2260 isig = int_exp; 2261 isig <<= shift; 2262 iexp = 0x403e - shift; 2263 shift128RightJamming(asig0, asig1, iexp - aexp, 2264 &asig0, &asig1); 2265 if (asign == isign) { 2266 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2267 } else { 2268 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2269 } 2270 aexp = iexp; 2271 asign = isign; 2272 } 2273 /* 2274 * Multiply by the second argument to compute the required 2275 * result. 2276 */ 2277 if (arg1_exp == 0) { 2278 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2279 } 2280 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2281 aexp += arg1_exp - 0x3ffe; 2282 /* This result is inexact. */ 2283 asig1 |= 1; 2284 env->fp_status.float_rounding_mode = save_mode; 2285 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2286 asign ^ arg1_sign, aexp, 2287 asig0, asig1, &env->fp_status); 2288 } 2289 2290 env->fp_status.floatx80_rounding_precision = save_prec; 2291 } 2292 fpop(env); 2293 merge_exception_flags(env, old_flags); 2294 } 2295 2296 void helper_fsqrt(CPUX86State *env) 2297 { 2298 uint8_t old_flags = save_exception_flags(env); 2299 if (floatx80_is_neg(ST0)) { 2300 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2301 env->fpus |= 0x400; 2302 } 2303 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2304 merge_exception_flags(env, old_flags); 2305 } 2306 2307 void helper_fsincos(CPUX86State *env) 2308 { 2309 double fptemp = floatx80_to_double(env, ST0); 2310 2311 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2312 env->fpus |= 0x400; 2313 } else { 2314 ST0 = double_to_floatx80(env, sin(fptemp)); 2315 fpush(env); 2316 ST0 = double_to_floatx80(env, cos(fptemp)); 2317 env->fpus &= ~0x400; /* C2 <-- 0 */ 2318 /* the above code is for |arg| < 2**63 only */ 2319 } 2320 } 2321 2322 void helper_frndint(CPUX86State *env) 2323 { 2324 uint8_t old_flags = save_exception_flags(env); 2325 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2326 merge_exception_flags(env, old_flags); 2327 } 2328 2329 void helper_fscale(CPUX86State *env) 2330 { 2331 uint8_t old_flags = save_exception_flags(env); 2332 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2333 float_raise(float_flag_invalid, &env->fp_status); 2334 ST0 = floatx80_default_nan(&env->fp_status); 2335 } else if (floatx80_is_any_nan(ST1)) { 2336 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2337 float_raise(float_flag_invalid, &env->fp_status); 2338 } 2339 ST0 = ST1; 2340 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2341 float_raise(float_flag_invalid, &env->fp_status); 2342 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2343 } 2344 } else if (floatx80_is_infinity(ST1) && 2345 !floatx80_invalid_encoding(ST0) && 2346 !floatx80_is_any_nan(ST0)) { 2347 if (floatx80_is_neg(ST1)) { 2348 if (floatx80_is_infinity(ST0)) { 2349 float_raise(float_flag_invalid, &env->fp_status); 2350 ST0 = floatx80_default_nan(&env->fp_status); 2351 } else { 2352 ST0 = (floatx80_is_neg(ST0) ? 2353 floatx80_chs(floatx80_zero) : 2354 floatx80_zero); 2355 } 2356 } else { 2357 if (floatx80_is_zero(ST0)) { 2358 float_raise(float_flag_invalid, &env->fp_status); 2359 ST0 = floatx80_default_nan(&env->fp_status); 2360 } else { 2361 ST0 = (floatx80_is_neg(ST0) ? 2362 floatx80_chs(floatx80_infinity) : 2363 floatx80_infinity); 2364 } 2365 } 2366 } else { 2367 int n; 2368 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2369 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2370 set_float_exception_flags(0, &env->fp_status); 2371 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2372 set_float_exception_flags(save_flags, &env->fp_status); 2373 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2374 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2375 env->fp_status.floatx80_rounding_precision = save; 2376 } 2377 merge_exception_flags(env, old_flags); 2378 } 2379 2380 void helper_fsin(CPUX86State *env) 2381 { 2382 double fptemp = floatx80_to_double(env, ST0); 2383 2384 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2385 env->fpus |= 0x400; 2386 } else { 2387 ST0 = double_to_floatx80(env, sin(fptemp)); 2388 env->fpus &= ~0x400; /* C2 <-- 0 */ 2389 /* the above code is for |arg| < 2**53 only */ 2390 } 2391 } 2392 2393 void helper_fcos(CPUX86State *env) 2394 { 2395 double fptemp = floatx80_to_double(env, ST0); 2396 2397 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2398 env->fpus |= 0x400; 2399 } else { 2400 ST0 = double_to_floatx80(env, cos(fptemp)); 2401 env->fpus &= ~0x400; /* C2 <-- 0 */ 2402 /* the above code is for |arg| < 2**63 only */ 2403 } 2404 } 2405 2406 void helper_fxam_ST0(CPUX86State *env) 2407 { 2408 CPU_LDoubleU temp; 2409 int expdif; 2410 2411 temp.d = ST0; 2412 2413 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2414 if (SIGND(temp)) { 2415 env->fpus |= 0x200; /* C1 <-- 1 */ 2416 } 2417 2418 if (env->fptags[env->fpstt]) { 2419 env->fpus |= 0x4100; /* Empty */ 2420 return; 2421 } 2422 2423 expdif = EXPD(temp); 2424 if (expdif == MAXEXPD) { 2425 if (MANTD(temp) == 0x8000000000000000ULL) { 2426 env->fpus |= 0x500; /* Infinity */ 2427 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2428 env->fpus |= 0x100; /* NaN */ 2429 } 2430 } else if (expdif == 0) { 2431 if (MANTD(temp) == 0) { 2432 env->fpus |= 0x4000; /* Zero */ 2433 } else { 2434 env->fpus |= 0x4400; /* Denormal */ 2435 } 2436 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2437 env->fpus |= 0x400; 2438 } 2439 } 2440 2441 static void do_fstenv(X86Access *ac, target_ulong ptr, int data32) 2442 { 2443 CPUX86State *env = ac->env; 2444 int fpus, fptag, exp, i; 2445 uint64_t mant; 2446 CPU_LDoubleU tmp; 2447 2448 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2449 fptag = 0; 2450 for (i = 7; i >= 0; i--) { 2451 fptag <<= 2; 2452 if (env->fptags[i]) { 2453 fptag |= 3; 2454 } else { 2455 tmp.d = env->fpregs[i].d; 2456 exp = EXPD(tmp); 2457 mant = MANTD(tmp); 2458 if (exp == 0 && mant == 0) { 2459 /* zero */ 2460 fptag |= 1; 2461 } else if (exp == 0 || exp == MAXEXPD 2462 || (mant & (1LL << 63)) == 0) { 2463 /* NaNs, infinity, denormal */ 2464 fptag |= 2; 2465 } 2466 } 2467 } 2468 if (data32) { 2469 /* 32 bit */ 2470 access_stl(ac, ptr, env->fpuc); 2471 access_stl(ac, ptr + 4, fpus); 2472 access_stl(ac, ptr + 8, fptag); 2473 access_stl(ac, ptr + 12, env->fpip); /* fpip */ 2474 access_stl(ac, ptr + 16, env->fpcs); /* fpcs */ 2475 access_stl(ac, ptr + 20, env->fpdp); /* fpoo */ 2476 access_stl(ac, ptr + 24, env->fpds); /* fpos */ 2477 } else { 2478 /* 16 bit */ 2479 access_stw(ac, ptr, env->fpuc); 2480 access_stw(ac, ptr + 2, fpus); 2481 access_stw(ac, ptr + 4, fptag); 2482 access_stw(ac, ptr + 6, env->fpip); 2483 access_stw(ac, ptr + 8, env->fpcs); 2484 access_stw(ac, ptr + 10, env->fpdp); 2485 access_stw(ac, ptr + 12, env->fpds); 2486 } 2487 } 2488 2489 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2490 { 2491 X86Access ac; 2492 2493 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); 2494 do_fstenv(&ac, ptr, data32); 2495 } 2496 2497 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2498 { 2499 env->fpstt = (fpus >> 11) & 7; 2500 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2501 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2502 #if !defined(CONFIG_USER_ONLY) 2503 if (!(env->fpus & FPUS_SE)) { 2504 /* 2505 * Here the processor deasserts FERR#; in response, the chipset deasserts 2506 * IGNNE#. 2507 */ 2508 cpu_clear_ignne(); 2509 } 2510 #endif 2511 } 2512 2513 static void do_fldenv(X86Access *ac, target_ulong ptr, int data32) 2514 { 2515 int i, fpus, fptag; 2516 CPUX86State *env = ac->env; 2517 2518 cpu_set_fpuc(env, access_ldw(ac, ptr)); 2519 fpus = access_ldw(ac, ptr + (2 << data32)); 2520 fptag = access_ldw(ac, ptr + (4 << data32)); 2521 2522 cpu_set_fpus(env, fpus); 2523 for (i = 0; i < 8; i++) { 2524 env->fptags[i] = ((fptag & 3) == 3); 2525 fptag >>= 2; 2526 } 2527 } 2528 2529 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2530 { 2531 X86Access ac; 2532 2533 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); 2534 do_fldenv(&ac, ptr, data32); 2535 } 2536 2537 static void do_fsave(X86Access *ac, target_ulong ptr, int data32) 2538 { 2539 CPUX86State *env = ac->env; 2540 2541 do_fstenv(ac, ptr, data32); 2542 ptr += 14 << data32; 2543 2544 for (int i = 0; i < 8; i++) { 2545 floatx80 tmp = ST(i); 2546 do_fstt(ac, ptr, tmp); 2547 ptr += 10; 2548 } 2549 2550 do_fninit(env); 2551 } 2552 2553 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2554 { 2555 int size = (14 << data32) + 80; 2556 X86Access ac; 2557 2558 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC()); 2559 do_fsave(&ac, ptr, data32); 2560 } 2561 2562 static void do_frstor(X86Access *ac, target_ulong ptr, int data32) 2563 { 2564 CPUX86State *env = ac->env; 2565 2566 do_fldenv(ac, ptr, data32); 2567 ptr += 14 << data32; 2568 2569 for (int i = 0; i < 8; i++) { 2570 floatx80 tmp = do_fldt(ac, ptr); 2571 ST(i) = tmp; 2572 ptr += 10; 2573 } 2574 } 2575 2576 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2577 { 2578 int size = (14 << data32) + 80; 2579 X86Access ac; 2580 2581 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC()); 2582 do_frstor(&ac, ptr, data32); 2583 } 2584 2585 #define XO(X) offsetof(X86XSaveArea, X) 2586 2587 static void do_xsave_fpu(X86Access *ac, target_ulong ptr) 2588 { 2589 CPUX86State *env = ac->env; 2590 int fpus, fptag, i; 2591 target_ulong addr; 2592 2593 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2594 fptag = 0; 2595 for (i = 0; i < 8; i++) { 2596 fptag |= (env->fptags[i] << i); 2597 } 2598 2599 access_stw(ac, ptr + XO(legacy.fcw), env->fpuc); 2600 access_stw(ac, ptr + XO(legacy.fsw), fpus); 2601 access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff); 2602 2603 /* In 32-bit mode this is eip, sel, dp, sel. 2604 In 64-bit mode this is rip, rdp. 2605 But in either case we don't write actual data, just zeros. */ 2606 access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */ 2607 access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */ 2608 2609 addr = ptr + XO(legacy.fpregs); 2610 2611 for (i = 0; i < 8; i++) { 2612 floatx80 tmp = ST(i); 2613 do_fstt(ac, addr, tmp); 2614 addr += 16; 2615 } 2616 } 2617 2618 static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr) 2619 { 2620 CPUX86State *env = ac->env; 2621 2622 update_mxcsr_from_sse_status(env); 2623 access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr); 2624 access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff); 2625 } 2626 2627 static void do_xsave_sse(X86Access *ac, target_ulong ptr) 2628 { 2629 CPUX86State *env = ac->env; 2630 int i, nb_xmm_regs; 2631 target_ulong addr; 2632 2633 if (env->hflags & HF_CS64_MASK) { 2634 nb_xmm_regs = 16; 2635 } else { 2636 nb_xmm_regs = 8; 2637 } 2638 2639 addr = ptr + XO(legacy.xmm_regs); 2640 for (i = 0; i < nb_xmm_regs; i++) { 2641 access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0)); 2642 access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1)); 2643 addr += 16; 2644 } 2645 } 2646 2647 static void do_xsave_ymmh(X86Access *ac, target_ulong ptr) 2648 { 2649 CPUX86State *env = ac->env; 2650 int i, nb_xmm_regs; 2651 2652 if (env->hflags & HF_CS64_MASK) { 2653 nb_xmm_regs = 16; 2654 } else { 2655 nb_xmm_regs = 8; 2656 } 2657 2658 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2659 access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2)); 2660 access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3)); 2661 } 2662 } 2663 2664 static void do_xsave_bndregs(X86Access *ac, target_ulong ptr) 2665 { 2666 CPUX86State *env = ac->env; 2667 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2668 int i; 2669 2670 for (i = 0; i < 4; i++, addr += 16) { 2671 access_stq(ac, addr, env->bnd_regs[i].lb); 2672 access_stq(ac, addr + 8, env->bnd_regs[i].ub); 2673 } 2674 } 2675 2676 static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr) 2677 { 2678 CPUX86State *env = ac->env; 2679 2680 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2681 env->bndcs_regs.cfgu); 2682 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2683 env->bndcs_regs.sts); 2684 } 2685 2686 static void do_xsave_pkru(X86Access *ac, target_ulong ptr) 2687 { 2688 access_stq(ac, ptr, ac->env->pkru); 2689 } 2690 2691 static void do_fxsave(X86Access *ac, target_ulong ptr) 2692 { 2693 CPUX86State *env = ac->env; 2694 2695 do_xsave_fpu(ac, ptr); 2696 if (env->cr[4] & CR4_OSFXSR_MASK) { 2697 do_xsave_mxcsr(ac, ptr); 2698 /* Fast FXSAVE leaves out the XMM registers */ 2699 if (!(env->efer & MSR_EFER_FFXSR) 2700 || (env->hflags & HF_CPL_MASK) 2701 || !(env->hflags & HF_LMA_MASK)) { 2702 do_xsave_sse(ac, ptr); 2703 } 2704 } 2705 } 2706 2707 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2708 { 2709 uintptr_t ra = GETPC(); 2710 X86Access ac; 2711 2712 /* The operand must be 16 byte aligned */ 2713 if (ptr & 0xf) { 2714 raise_exception_ra(env, EXCP0D_GPF, ra); 2715 } 2716 2717 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), 2718 MMU_DATA_STORE, ra); 2719 do_fxsave(&ac, ptr); 2720 } 2721 2722 static uint64_t get_xinuse(CPUX86State *env) 2723 { 2724 uint64_t inuse = -1; 2725 2726 /* For the most part, we don't track XINUSE. We could calculate it 2727 here for all components, but it's probably less work to simply 2728 indicate in use. That said, the state of BNDREGS is important 2729 enough to track in HFLAGS, so we might as well use that here. */ 2730 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2731 inuse &= ~XSTATE_BNDREGS_MASK; 2732 } 2733 return inuse; 2734 } 2735 2736 static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm, 2737 uint64_t inuse, uint64_t opt) 2738 { 2739 uint64_t old_bv, new_bv; 2740 2741 if (opt & XSTATE_FP_MASK) { 2742 do_xsave_fpu(ac, ptr); 2743 } 2744 if (rfbm & XSTATE_SSE_MASK) { 2745 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2746 do_xsave_mxcsr(ac, ptr); 2747 } 2748 if (opt & XSTATE_SSE_MASK) { 2749 do_xsave_sse(ac, ptr); 2750 } 2751 if (opt & XSTATE_YMM_MASK) { 2752 do_xsave_ymmh(ac, ptr + XO(avx_state)); 2753 } 2754 if (opt & XSTATE_BNDREGS_MASK) { 2755 do_xsave_bndregs(ac, ptr + XO(bndreg_state)); 2756 } 2757 if (opt & XSTATE_BNDCSR_MASK) { 2758 do_xsave_bndcsr(ac, ptr + XO(bndcsr_state)); 2759 } 2760 if (opt & XSTATE_PKRU_MASK) { 2761 do_xsave_pkru(ac, ptr + XO(pkru_state)); 2762 } 2763 2764 /* Update the XSTATE_BV field. */ 2765 old_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); 2766 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2767 access_stq(ac, ptr + XO(header.xstate_bv), new_bv); 2768 } 2769 2770 static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2771 { 2772 /* The OS must have enabled XSAVE. */ 2773 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2774 raise_exception_ra(env, EXCP06_ILLOP, ra); 2775 } 2776 2777 /* The operand must be 64 byte aligned. */ 2778 if (ptr & 63) { 2779 raise_exception_ra(env, EXCP0D_GPF, ra); 2780 } 2781 } 2782 2783 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2784 uint64_t inuse, uint64_t opt, uintptr_t ra) 2785 { 2786 X86Access ac; 2787 unsigned size; 2788 2789 do_xsave_chk(env, ptr, ra); 2790 2791 /* Never save anything not enabled by XCR0. */ 2792 rfbm &= env->xcr0; 2793 opt &= rfbm; 2794 size = xsave_area_size(opt, false); 2795 2796 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra); 2797 do_xsave_access(&ac, ptr, rfbm, inuse, opt); 2798 } 2799 2800 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2801 { 2802 do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC()); 2803 } 2804 2805 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2806 { 2807 uint64_t inuse = get_xinuse(env); 2808 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2809 } 2810 2811 static void do_xrstor_fpu(X86Access *ac, target_ulong ptr) 2812 { 2813 CPUX86State *env = ac->env; 2814 int i, fpuc, fpus, fptag; 2815 target_ulong addr; 2816 2817 fpuc = access_ldw(ac, ptr + XO(legacy.fcw)); 2818 fpus = access_ldw(ac, ptr + XO(legacy.fsw)); 2819 fptag = access_ldw(ac, ptr + XO(legacy.ftw)); 2820 cpu_set_fpuc(env, fpuc); 2821 cpu_set_fpus(env, fpus); 2822 2823 fptag ^= 0xff; 2824 for (i = 0; i < 8; i++) { 2825 env->fptags[i] = ((fptag >> i) & 1); 2826 } 2827 2828 addr = ptr + XO(legacy.fpregs); 2829 2830 for (i = 0; i < 8; i++) { 2831 floatx80 tmp = do_fldt(ac, addr); 2832 ST(i) = tmp; 2833 addr += 16; 2834 } 2835 } 2836 2837 static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr) 2838 { 2839 CPUX86State *env = ac->env; 2840 cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr))); 2841 } 2842 2843 static void do_xrstor_sse(X86Access *ac, target_ulong ptr) 2844 { 2845 CPUX86State *env = ac->env; 2846 int i, nb_xmm_regs; 2847 target_ulong addr; 2848 2849 if (env->hflags & HF_CS64_MASK) { 2850 nb_xmm_regs = 16; 2851 } else { 2852 nb_xmm_regs = 8; 2853 } 2854 2855 addr = ptr + XO(legacy.xmm_regs); 2856 for (i = 0; i < nb_xmm_regs; i++) { 2857 env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr); 2858 env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8); 2859 addr += 16; 2860 } 2861 } 2862 2863 static void do_clear_sse(CPUX86State *env) 2864 { 2865 int i, nb_xmm_regs; 2866 2867 if (env->hflags & HF_CS64_MASK) { 2868 nb_xmm_regs = 16; 2869 } else { 2870 nb_xmm_regs = 8; 2871 } 2872 2873 for (i = 0; i < nb_xmm_regs; i++) { 2874 env->xmm_regs[i].ZMM_Q(0) = 0; 2875 env->xmm_regs[i].ZMM_Q(1) = 0; 2876 } 2877 } 2878 2879 static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr) 2880 { 2881 CPUX86State *env = ac->env; 2882 int i, nb_xmm_regs; 2883 2884 if (env->hflags & HF_CS64_MASK) { 2885 nb_xmm_regs = 16; 2886 } else { 2887 nb_xmm_regs = 8; 2888 } 2889 2890 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2891 env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr); 2892 env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8); 2893 } 2894 } 2895 2896 static void do_clear_ymmh(CPUX86State *env) 2897 { 2898 int i, nb_xmm_regs; 2899 2900 if (env->hflags & HF_CS64_MASK) { 2901 nb_xmm_regs = 16; 2902 } else { 2903 nb_xmm_regs = 8; 2904 } 2905 2906 for (i = 0; i < nb_xmm_regs; i++) { 2907 env->xmm_regs[i].ZMM_Q(2) = 0; 2908 env->xmm_regs[i].ZMM_Q(3) = 0; 2909 } 2910 } 2911 2912 static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr) 2913 { 2914 CPUX86State *env = ac->env; 2915 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2916 int i; 2917 2918 for (i = 0; i < 4; i++, addr += 16) { 2919 env->bnd_regs[i].lb = access_ldq(ac, addr); 2920 env->bnd_regs[i].ub = access_ldq(ac, addr + 8); 2921 } 2922 } 2923 2924 static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr) 2925 { 2926 CPUX86State *env = ac->env; 2927 2928 /* FIXME: Extend highest implemented bit of linear address. */ 2929 env->bndcs_regs.cfgu 2930 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu)); 2931 env->bndcs_regs.sts 2932 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts)); 2933 } 2934 2935 static void do_xrstor_pkru(X86Access *ac, target_ulong ptr) 2936 { 2937 ac->env->pkru = access_ldq(ac, ptr); 2938 } 2939 2940 static void do_fxrstor(X86Access *ac, target_ulong ptr) 2941 { 2942 CPUX86State *env = ac->env; 2943 2944 do_xrstor_fpu(ac, ptr); 2945 if (env->cr[4] & CR4_OSFXSR_MASK) { 2946 do_xrstor_mxcsr(ac, ptr); 2947 /* Fast FXRSTOR leaves out the XMM registers */ 2948 if (!(env->efer & MSR_EFER_FFXSR) 2949 || (env->hflags & HF_CPL_MASK) 2950 || !(env->hflags & HF_LMA_MASK)) { 2951 do_xrstor_sse(ac, ptr); 2952 } 2953 } 2954 } 2955 2956 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2957 { 2958 uintptr_t ra = GETPC(); 2959 X86Access ac; 2960 2961 /* The operand must be 16 byte aligned */ 2962 if (ptr & 0xf) { 2963 raise_exception_ra(env, EXCP0D_GPF, ra); 2964 } 2965 2966 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), 2967 MMU_DATA_LOAD, ra); 2968 do_fxrstor(&ac, ptr); 2969 } 2970 2971 static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv, 2972 target_ulong ptr) 2973 { 2974 uint64_t xstate_bv, xcomp_bv, reserve0; 2975 2976 xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); 2977 xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv)); 2978 reserve0 = access_ldq(ac, ptr + XO(header.reserve0)); 2979 *pxsbv = xstate_bv; 2980 2981 /* 2982 * XCOMP_BV bit 63 indicates compact form, which we do not support, 2983 * and thus must raise #GP. That leaves us in standard form. 2984 * In standard form, bytes 23:8 must be zero -- which is both 2985 * XCOMP_BV and the following 64-bit field. 2986 */ 2987 if (xcomp_bv || reserve0) { 2988 return false; 2989 } 2990 2991 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2992 return (xstate_bv & ~ac->env->xcr0) == 0; 2993 } 2994 2995 static void do_xrstor(X86Access *ac, target_ulong ptr, 2996 uint64_t rfbm, uint64_t xstate_bv) 2997 { 2998 CPUX86State *env = ac->env; 2999 3000 if (rfbm & XSTATE_FP_MASK) { 3001 if (xstate_bv & XSTATE_FP_MASK) { 3002 do_xrstor_fpu(ac, ptr); 3003 } else { 3004 do_fninit(env); 3005 memset(env->fpregs, 0, sizeof(env->fpregs)); 3006 } 3007 } 3008 if (rfbm & XSTATE_SSE_MASK) { 3009 /* Note that the standard form of XRSTOR loads MXCSR from memory 3010 whether or not the XSTATE_BV bit is set. */ 3011 do_xrstor_mxcsr(ac, ptr); 3012 if (xstate_bv & XSTATE_SSE_MASK) { 3013 do_xrstor_sse(ac, ptr); 3014 } else { 3015 do_clear_sse(env); 3016 } 3017 } 3018 if (rfbm & XSTATE_YMM_MASK) { 3019 if (xstate_bv & XSTATE_YMM_MASK) { 3020 do_xrstor_ymmh(ac, ptr + XO(avx_state)); 3021 } else { 3022 do_clear_ymmh(env); 3023 } 3024 } 3025 if (rfbm & XSTATE_BNDREGS_MASK) { 3026 if (xstate_bv & XSTATE_BNDREGS_MASK) { 3027 do_xrstor_bndregs(ac, ptr + XO(bndreg_state)); 3028 env->hflags |= HF_MPX_IU_MASK; 3029 } else { 3030 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 3031 env->hflags &= ~HF_MPX_IU_MASK; 3032 } 3033 } 3034 if (rfbm & XSTATE_BNDCSR_MASK) { 3035 if (xstate_bv & XSTATE_BNDCSR_MASK) { 3036 do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state)); 3037 } else { 3038 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 3039 } 3040 cpu_sync_bndcs_hflags(env); 3041 } 3042 if (rfbm & XSTATE_PKRU_MASK) { 3043 uint64_t old_pkru = env->pkru; 3044 if (xstate_bv & XSTATE_PKRU_MASK) { 3045 do_xrstor_pkru(ac, ptr + XO(pkru_state)); 3046 } else { 3047 env->pkru = 0; 3048 } 3049 if (env->pkru != old_pkru) { 3050 CPUState *cs = env_cpu(env); 3051 tlb_flush(cs); 3052 } 3053 } 3054 } 3055 3056 #undef XO 3057 3058 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 3059 { 3060 uintptr_t ra = GETPC(); 3061 X86Access ac; 3062 uint64_t xstate_bv; 3063 unsigned size, size_ext; 3064 3065 do_xsave_chk(env, ptr, ra); 3066 3067 /* Begin with just the minimum size to validate the header. */ 3068 size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); 3069 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); 3070 if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) { 3071 raise_exception_ra(env, EXCP0D_GPF, ra); 3072 } 3073 3074 rfbm &= env->xcr0; 3075 size_ext = xsave_area_size(rfbm & xstate_bv, false); 3076 if (size < size_ext) { 3077 /* TODO: See if existing page probe has covered extra size. */ 3078 access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra); 3079 } 3080 3081 do_xrstor(&ac, ptr, rfbm, xstate_bv); 3082 } 3083 3084 #if defined(CONFIG_USER_ONLY) 3085 void cpu_x86_fsave(CPUX86State *env, void *host, size_t len) 3086 { 3087 X86Access ac = { 3088 .haddr1 = host, 3089 .size = 4 * 7 + 8 * 10, 3090 .env = env, 3091 }; 3092 3093 assert(ac.size <= len); 3094 do_fsave(&ac, 0, true); 3095 } 3096 3097 void cpu_x86_frstor(CPUX86State *env, void *host, size_t len) 3098 { 3099 X86Access ac = { 3100 .haddr1 = host, 3101 .size = 4 * 7 + 8 * 10, 3102 .env = env, 3103 }; 3104 3105 assert(ac.size <= len); 3106 do_frstor(&ac, 0, true); 3107 } 3108 3109 void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len) 3110 { 3111 X86Access ac = { 3112 .haddr1 = host, 3113 .size = sizeof(X86LegacyXSaveArea), 3114 .env = env, 3115 }; 3116 3117 assert(ac.size <= len); 3118 do_fxsave(&ac, 0); 3119 } 3120 3121 void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len) 3122 { 3123 X86Access ac = { 3124 .haddr1 = host, 3125 .size = sizeof(X86LegacyXSaveArea), 3126 .env = env, 3127 }; 3128 3129 assert(ac.size <= len); 3130 do_fxrstor(&ac, 0); 3131 } 3132 3133 void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm) 3134 { 3135 X86Access ac = { 3136 .haddr1 = host, 3137 .env = env, 3138 }; 3139 3140 /* 3141 * Since this is only called from user-level signal handling, 3142 * we should have done the job correctly there. 3143 */ 3144 assert((rfbm & ~env->xcr0) == 0); 3145 ac.size = xsave_area_size(rfbm, false); 3146 assert(ac.size <= len); 3147 do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm); 3148 } 3149 3150 bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm) 3151 { 3152 X86Access ac = { 3153 .haddr1 = host, 3154 .env = env, 3155 }; 3156 uint64_t xstate_bv; 3157 3158 /* 3159 * Since this is only called from user-level signal handling, 3160 * we should have done the job correctly there. 3161 */ 3162 assert((rfbm & ~env->xcr0) == 0); 3163 ac.size = xsave_area_size(rfbm, false); 3164 assert(ac.size <= len); 3165 3166 if (!valid_xrstor_header(&ac, &xstate_bv, 0)) { 3167 return false; 3168 } 3169 do_xrstor(&ac, 0, rfbm, xstate_bv); 3170 return true; 3171 } 3172 #endif 3173 3174 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 3175 { 3176 /* The OS must have enabled XSAVE. */ 3177 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3178 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3179 } 3180 3181 switch (ecx) { 3182 case 0: 3183 return env->xcr0; 3184 case 1: 3185 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 3186 return env->xcr0 & get_xinuse(env); 3187 } 3188 break; 3189 } 3190 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3191 } 3192 3193 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 3194 { 3195 uint32_t dummy, ena_lo, ena_hi; 3196 uint64_t ena; 3197 3198 /* The OS must have enabled XSAVE. */ 3199 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3200 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3201 } 3202 3203 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 3204 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 3205 goto do_gpf; 3206 } 3207 3208 /* SSE can be disabled, but only if AVX is disabled too. */ 3209 if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) { 3210 goto do_gpf; 3211 } 3212 3213 /* Disallow enabling unimplemented features. */ 3214 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 3215 ena = ((uint64_t)ena_hi << 32) | ena_lo; 3216 if (mask & ~ena) { 3217 goto do_gpf; 3218 } 3219 3220 /* Disallow enabling only half of MPX. */ 3221 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 3222 & XSTATE_BNDCSR_MASK) { 3223 goto do_gpf; 3224 } 3225 3226 env->xcr0 = mask; 3227 cpu_sync_bndcs_hflags(env); 3228 cpu_sync_avx_hflag(env); 3229 return; 3230 3231 do_gpf: 3232 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3233 } 3234 3235 /* MMX/SSE */ 3236 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 3237 3238 #define SSE_DAZ 0x0040 3239 #define SSE_RC_SHIFT 13 3240 #define SSE_RC_MASK (3 << SSE_RC_SHIFT) 3241 #define SSE_FZ 0x8000 3242 3243 void update_mxcsr_status(CPUX86State *env) 3244 { 3245 uint32_t mxcsr = env->mxcsr; 3246 int rnd_type; 3247 3248 /* set rounding mode */ 3249 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; 3250 set_x86_rounding_mode(rnd_type, &env->sse_status); 3251 3252 /* Set exception flags. */ 3253 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 3254 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 3255 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3256 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3257 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3258 &env->sse_status); 3259 3260 /* set denormals are zero */ 3261 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3262 3263 /* set flush to zero */ 3264 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3265 } 3266 3267 void update_mxcsr_from_sse_status(CPUX86State *env) 3268 { 3269 uint8_t flags = get_float_exception_flags(&env->sse_status); 3270 /* 3271 * The MXCSR denormal flag has opposite semantics to 3272 * float_flag_input_denormal_flushed (the softfloat code sets that flag 3273 * only when flushing input denormals to zero, but SSE sets it 3274 * only when not flushing them to zero), so is not converted 3275 * here. 3276 */ 3277 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3278 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3279 (flags & float_flag_overflow ? FPUS_OE : 0) | 3280 (flags & float_flag_underflow ? FPUS_UE : 0) | 3281 (flags & float_flag_inexact ? FPUS_PE : 0) | 3282 (flags & float_flag_output_denormal_flushed ? FPUS_UE | FPUS_PE : 3283 0)); 3284 } 3285 3286 void helper_update_mxcsr(CPUX86State *env) 3287 { 3288 update_mxcsr_from_sse_status(env); 3289 } 3290 3291 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3292 { 3293 cpu_set_mxcsr(env, val); 3294 } 3295 3296 void helper_enter_mmx(CPUX86State *env) 3297 { 3298 env->fpstt = 0; 3299 *(uint32_t *)(env->fptags) = 0; 3300 *(uint32_t *)(env->fptags + 4) = 0; 3301 } 3302 3303 void helper_emms(CPUX86State *env) 3304 { 3305 /* set to empty state */ 3306 *(uint32_t *)(env->fptags) = 0x01010101; 3307 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3308 } 3309 3310 #define SHIFT 0 3311 #include "ops_sse.h" 3312 3313 #define SHIFT 1 3314 #include "ops_sse.h" 3315 3316 #define SHIFT 2 3317 #include "ops_sse.h" 3318