1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "fpu/softfloat-macros.h" 29 #include "helper-tcg.h" 30 #include "access.h" 31 32 /* float macros */ 33 #define FT0 (env->ft0) 34 #define ST0 (env->fpregs[env->fpstt].d) 35 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 36 #define ST1 ST(1) 37 38 #define FPU_RC_SHIFT 10 39 #define FPU_RC_MASK (3 << FPU_RC_SHIFT) 40 #define FPU_RC_NEAR 0x000 41 #define FPU_RC_DOWN 0x400 42 #define FPU_RC_UP 0x800 43 #define FPU_RC_CHOP 0xc00 44 45 #define MAXTAN 9223372036854775808.0 46 47 /* the following deal with x86 long double-precision numbers */ 48 #define MAXEXPD 0x7fff 49 #define EXPBIAS 16383 50 #define EXPD(fp) (fp.l.upper & 0x7fff) 51 #define SIGND(fp) ((fp.l.upper) & 0x8000) 52 #define MANTD(fp) (fp.l.lower) 53 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 54 55 #define FPUS_IE (1 << 0) 56 #define FPUS_DE (1 << 1) 57 #define FPUS_ZE (1 << 2) 58 #define FPUS_OE (1 << 3) 59 #define FPUS_UE (1 << 4) 60 #define FPUS_PE (1 << 5) 61 #define FPUS_SF (1 << 6) 62 #define FPUS_SE (1 << 7) 63 #define FPUS_B (1 << 15) 64 65 #define FPUC_EM 0x3f 66 67 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 68 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 69 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 70 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 71 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 72 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 73 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 74 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 75 76 static inline void fpush(CPUX86State *env) 77 { 78 env->fpstt = (env->fpstt - 1) & 7; 79 env->fptags[env->fpstt] = 0; /* validate stack entry */ 80 } 81 82 static inline void fpop(CPUX86State *env) 83 { 84 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 85 env->fpstt = (env->fpstt + 1) & 7; 86 } 87 88 static floatx80 do_fldt(X86Access *ac, target_ulong ptr) 89 { 90 CPU_LDoubleU temp; 91 92 temp.l.lower = access_ldq(ac, ptr); 93 temp.l.upper = access_ldw(ac, ptr + 8); 94 return temp.d; 95 } 96 97 static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f) 98 { 99 CPU_LDoubleU temp; 100 101 temp.d = f; 102 access_stq(ac, ptr, temp.l.lower); 103 access_stw(ac, ptr + 8, temp.l.upper); 104 } 105 106 /* x87 FPU helpers */ 107 108 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 109 { 110 union { 111 float64 f64; 112 double d; 113 } u; 114 115 u.f64 = floatx80_to_float64(a, &env->fp_status); 116 return u.d; 117 } 118 119 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 120 { 121 union { 122 float64 f64; 123 double d; 124 } u; 125 126 u.d = a; 127 return float64_to_floatx80(u.f64, &env->fp_status); 128 } 129 130 static void fpu_set_exception(CPUX86State *env, int mask) 131 { 132 env->fpus |= mask; 133 if (env->fpus & (~env->fpuc & FPUC_EM)) { 134 env->fpus |= FPUS_SE | FPUS_B; 135 } 136 } 137 138 void cpu_init_fp_statuses(CPUX86State *env) 139 { 140 /* 141 * Initialise the non-runtime-varying fields of the various 142 * float_status words to x86 behaviour. This must be called at 143 * CPU reset because the float_status words are in the 144 * "zeroed on reset" portion of the CPU state struct. 145 * Fields in float_status that vary under guest control are set 146 * via the codepath for setting that register, eg cpu_set_fpuc(). 147 */ 148 /* 149 * Use x87 NaN propagation rules: 150 * SNaN + QNaN => return the QNaN 151 * two SNaNs => return the one with the larger significand, silenced 152 * two QNaNs => return the one with the larger significand 153 * SNaN and a non-NaN => return the SNaN, silenced 154 * QNaN and a non-NaN => return the QNaN 155 * 156 * If we get down to comparing significands and they are the same, 157 * return the NaN with the positive sign bit (if any). 158 */ 159 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); 160 /* 161 * TODO: These are incorrect: the x86 Software Developer's Manual vol 1 162 * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the 163 * "larger significand" behaviour is only used for x87 FPU operations. 164 * For SSE the required behaviour is to always return the first NaN, 165 * which is float_2nan_prop_ab. 166 * 167 * mmx_status is used only for the AMD 3DNow! instructions, which 168 * are documented in the "3DNow! Technology Manual" as not supporting 169 * NaNs or infinities as inputs. The result of passing two NaNs is 170 * documented as "undefined", so we can do what we choose. 171 * (Strictly there is some behaviour we don't implement correctly 172 * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".) 173 */ 174 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status); 175 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status); 176 /* 177 * Only SSE has multiply-add instructions. In the SDM Section 14.5.2 178 * "Fused-Multiply-ADD (FMA) Numeric Behavior" the NaN handling is 179 * specified -- for 0 * inf + NaN the input NaN is selected, and if 180 * there are multiple input NaNs they are selected in the order a, b, c. 181 */ 182 set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->sse_status); 183 set_float_3nan_prop_rule(float_3nan_prop_abc, &env->sse_status); 184 /* Default NaN: sign bit set, most significant frac bit set */ 185 set_float_default_nan_pattern(0b11000000, &env->fp_status); 186 set_float_default_nan_pattern(0b11000000, &env->mmx_status); 187 set_float_default_nan_pattern(0b11000000, &env->sse_status); 188 } 189 190 static inline uint8_t save_exception_flags(CPUX86State *env) 191 { 192 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 193 set_float_exception_flags(0, &env->fp_status); 194 return old_flags; 195 } 196 197 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 198 { 199 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 200 float_raise(old_flags, &env->fp_status); 201 fpu_set_exception(env, 202 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 203 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 204 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 205 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 206 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 207 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 208 } 209 210 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 211 { 212 uint8_t old_flags = save_exception_flags(env); 213 floatx80 ret = floatx80_div(a, b, &env->fp_status); 214 merge_exception_flags(env, old_flags); 215 return ret; 216 } 217 218 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 219 { 220 if (env->cr[0] & CR0_NE_MASK) { 221 raise_exception_ra(env, EXCP10_COPR, retaddr); 222 } 223 #if !defined(CONFIG_USER_ONLY) 224 else { 225 fpu_check_raise_ferr_irq(env); 226 } 227 #endif 228 } 229 230 void helper_flds_FT0(CPUX86State *env, uint32_t val) 231 { 232 uint8_t old_flags = save_exception_flags(env); 233 union { 234 float32 f; 235 uint32_t i; 236 } u; 237 238 u.i = val; 239 FT0 = float32_to_floatx80(u.f, &env->fp_status); 240 merge_exception_flags(env, old_flags); 241 } 242 243 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 244 { 245 uint8_t old_flags = save_exception_flags(env); 246 union { 247 float64 f; 248 uint64_t i; 249 } u; 250 251 u.i = val; 252 FT0 = float64_to_floatx80(u.f, &env->fp_status); 253 merge_exception_flags(env, old_flags); 254 } 255 256 void helper_fildl_FT0(CPUX86State *env, int32_t val) 257 { 258 FT0 = int32_to_floatx80(val, &env->fp_status); 259 } 260 261 void helper_flds_ST0(CPUX86State *env, uint32_t val) 262 { 263 uint8_t old_flags = save_exception_flags(env); 264 int new_fpstt; 265 union { 266 float32 f; 267 uint32_t i; 268 } u; 269 270 new_fpstt = (env->fpstt - 1) & 7; 271 u.i = val; 272 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 273 env->fpstt = new_fpstt; 274 env->fptags[new_fpstt] = 0; /* validate stack entry */ 275 merge_exception_flags(env, old_flags); 276 } 277 278 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 279 { 280 uint8_t old_flags = save_exception_flags(env); 281 int new_fpstt; 282 union { 283 float64 f; 284 uint64_t i; 285 } u; 286 287 new_fpstt = (env->fpstt - 1) & 7; 288 u.i = val; 289 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 290 env->fpstt = new_fpstt; 291 env->fptags[new_fpstt] = 0; /* validate stack entry */ 292 merge_exception_flags(env, old_flags); 293 } 294 295 static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 296 { 297 FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 298 set_floatx80_rounding_precision(floatx80_precision_x, st); 299 return old; 300 } 301 302 void helper_fildl_ST0(CPUX86State *env, int32_t val) 303 { 304 int new_fpstt; 305 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 306 307 new_fpstt = (env->fpstt - 1) & 7; 308 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 309 env->fpstt = new_fpstt; 310 env->fptags[new_fpstt] = 0; /* validate stack entry */ 311 312 set_floatx80_rounding_precision(old, &env->fp_status); 313 } 314 315 void helper_fildll_ST0(CPUX86State *env, int64_t val) 316 { 317 int new_fpstt; 318 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 319 320 new_fpstt = (env->fpstt - 1) & 7; 321 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 322 env->fpstt = new_fpstt; 323 env->fptags[new_fpstt] = 0; /* validate stack entry */ 324 325 set_floatx80_rounding_precision(old, &env->fp_status); 326 } 327 328 uint32_t helper_fsts_ST0(CPUX86State *env) 329 { 330 uint8_t old_flags = save_exception_flags(env); 331 union { 332 float32 f; 333 uint32_t i; 334 } u; 335 336 u.f = floatx80_to_float32(ST0, &env->fp_status); 337 merge_exception_flags(env, old_flags); 338 return u.i; 339 } 340 341 uint64_t helper_fstl_ST0(CPUX86State *env) 342 { 343 uint8_t old_flags = save_exception_flags(env); 344 union { 345 float64 f; 346 uint64_t i; 347 } u; 348 349 u.f = floatx80_to_float64(ST0, &env->fp_status); 350 merge_exception_flags(env, old_flags); 351 return u.i; 352 } 353 354 int32_t helper_fist_ST0(CPUX86State *env) 355 { 356 uint8_t old_flags = save_exception_flags(env); 357 int32_t val; 358 359 val = floatx80_to_int32(ST0, &env->fp_status); 360 if (val != (int16_t)val) { 361 set_float_exception_flags(float_flag_invalid, &env->fp_status); 362 val = -32768; 363 } 364 merge_exception_flags(env, old_flags); 365 return val; 366 } 367 368 int32_t helper_fistl_ST0(CPUX86State *env) 369 { 370 uint8_t old_flags = save_exception_flags(env); 371 int32_t val; 372 373 val = floatx80_to_int32(ST0, &env->fp_status); 374 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 375 val = 0x80000000; 376 } 377 merge_exception_flags(env, old_flags); 378 return val; 379 } 380 381 int64_t helper_fistll_ST0(CPUX86State *env) 382 { 383 uint8_t old_flags = save_exception_flags(env); 384 int64_t val; 385 386 val = floatx80_to_int64(ST0, &env->fp_status); 387 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 388 val = 0x8000000000000000ULL; 389 } 390 merge_exception_flags(env, old_flags); 391 return val; 392 } 393 394 int32_t helper_fistt_ST0(CPUX86State *env) 395 { 396 uint8_t old_flags = save_exception_flags(env); 397 int32_t val; 398 399 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 400 if (val != (int16_t)val) { 401 set_float_exception_flags(float_flag_invalid, &env->fp_status); 402 val = -32768; 403 } 404 merge_exception_flags(env, old_flags); 405 return val; 406 } 407 408 int32_t helper_fisttl_ST0(CPUX86State *env) 409 { 410 uint8_t old_flags = save_exception_flags(env); 411 int32_t val; 412 413 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 414 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 415 val = 0x80000000; 416 } 417 merge_exception_flags(env, old_flags); 418 return val; 419 } 420 421 int64_t helper_fisttll_ST0(CPUX86State *env) 422 { 423 uint8_t old_flags = save_exception_flags(env); 424 int64_t val; 425 426 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 427 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 428 val = 0x8000000000000000ULL; 429 } 430 merge_exception_flags(env, old_flags); 431 return val; 432 } 433 434 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 435 { 436 int new_fpstt; 437 X86Access ac; 438 439 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); 440 441 new_fpstt = (env->fpstt - 1) & 7; 442 env->fpregs[new_fpstt].d = do_fldt(&ac, ptr); 443 env->fpstt = new_fpstt; 444 env->fptags[new_fpstt] = 0; /* validate stack entry */ 445 } 446 447 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 448 { 449 X86Access ac; 450 451 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); 452 do_fstt(&ac, ptr, ST0); 453 } 454 455 void helper_fpush(CPUX86State *env) 456 { 457 fpush(env); 458 } 459 460 void helper_fpop(CPUX86State *env) 461 { 462 fpop(env); 463 } 464 465 void helper_fdecstp(CPUX86State *env) 466 { 467 env->fpstt = (env->fpstt - 1) & 7; 468 env->fpus &= ~0x4700; 469 } 470 471 void helper_fincstp(CPUX86State *env) 472 { 473 env->fpstt = (env->fpstt + 1) & 7; 474 env->fpus &= ~0x4700; 475 } 476 477 /* FPU move */ 478 479 void helper_ffree_STN(CPUX86State *env, int st_index) 480 { 481 env->fptags[(env->fpstt + st_index) & 7] = 1; 482 } 483 484 void helper_fmov_ST0_FT0(CPUX86State *env) 485 { 486 ST0 = FT0; 487 } 488 489 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 490 { 491 FT0 = ST(st_index); 492 } 493 494 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 495 { 496 ST0 = ST(st_index); 497 } 498 499 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 500 { 501 ST(st_index) = ST0; 502 } 503 504 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 505 { 506 floatx80 tmp; 507 508 tmp = ST(st_index); 509 ST(st_index) = ST0; 510 ST0 = tmp; 511 } 512 513 /* FPU operations */ 514 515 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 516 517 void helper_fcom_ST0_FT0(CPUX86State *env) 518 { 519 uint8_t old_flags = save_exception_flags(env); 520 FloatRelation ret; 521 522 ret = floatx80_compare(ST0, FT0, &env->fp_status); 523 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 524 merge_exception_flags(env, old_flags); 525 } 526 527 void helper_fucom_ST0_FT0(CPUX86State *env) 528 { 529 uint8_t old_flags = save_exception_flags(env); 530 FloatRelation ret; 531 532 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 533 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 534 merge_exception_flags(env, old_flags); 535 } 536 537 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 538 539 void helper_fcomi_ST0_FT0(CPUX86State *env) 540 { 541 uint8_t old_flags = save_exception_flags(env); 542 int eflags; 543 FloatRelation ret; 544 545 ret = floatx80_compare(ST0, FT0, &env->fp_status); 546 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 547 CC_SRC = eflags | fcomi_ccval[ret + 1]; 548 CC_OP = CC_OP_EFLAGS; 549 merge_exception_flags(env, old_flags); 550 } 551 552 void helper_fucomi_ST0_FT0(CPUX86State *env) 553 { 554 uint8_t old_flags = save_exception_flags(env); 555 int eflags; 556 FloatRelation ret; 557 558 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 559 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 560 CC_SRC = eflags | fcomi_ccval[ret + 1]; 561 CC_OP = CC_OP_EFLAGS; 562 merge_exception_flags(env, old_flags); 563 } 564 565 void helper_fadd_ST0_FT0(CPUX86State *env) 566 { 567 uint8_t old_flags = save_exception_flags(env); 568 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 569 merge_exception_flags(env, old_flags); 570 } 571 572 void helper_fmul_ST0_FT0(CPUX86State *env) 573 { 574 uint8_t old_flags = save_exception_flags(env); 575 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 576 merge_exception_flags(env, old_flags); 577 } 578 579 void helper_fsub_ST0_FT0(CPUX86State *env) 580 { 581 uint8_t old_flags = save_exception_flags(env); 582 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 583 merge_exception_flags(env, old_flags); 584 } 585 586 void helper_fsubr_ST0_FT0(CPUX86State *env) 587 { 588 uint8_t old_flags = save_exception_flags(env); 589 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 590 merge_exception_flags(env, old_flags); 591 } 592 593 void helper_fdiv_ST0_FT0(CPUX86State *env) 594 { 595 ST0 = helper_fdiv(env, ST0, FT0); 596 } 597 598 void helper_fdivr_ST0_FT0(CPUX86State *env) 599 { 600 ST0 = helper_fdiv(env, FT0, ST0); 601 } 602 603 /* fp operations between STN and ST0 */ 604 605 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 606 { 607 uint8_t old_flags = save_exception_flags(env); 608 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 609 merge_exception_flags(env, old_flags); 610 } 611 612 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 613 { 614 uint8_t old_flags = save_exception_flags(env); 615 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 616 merge_exception_flags(env, old_flags); 617 } 618 619 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 620 { 621 uint8_t old_flags = save_exception_flags(env); 622 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 623 merge_exception_flags(env, old_flags); 624 } 625 626 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 627 { 628 uint8_t old_flags = save_exception_flags(env); 629 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 630 merge_exception_flags(env, old_flags); 631 } 632 633 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 634 { 635 floatx80 *p; 636 637 p = &ST(st_index); 638 *p = helper_fdiv(env, *p, ST0); 639 } 640 641 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 642 { 643 floatx80 *p; 644 645 p = &ST(st_index); 646 *p = helper_fdiv(env, ST0, *p); 647 } 648 649 /* misc FPU operations */ 650 void helper_fchs_ST0(CPUX86State *env) 651 { 652 ST0 = floatx80_chs(ST0); 653 } 654 655 void helper_fabs_ST0(CPUX86State *env) 656 { 657 ST0 = floatx80_abs(ST0); 658 } 659 660 void helper_fld1_ST0(CPUX86State *env) 661 { 662 ST0 = floatx80_one; 663 } 664 665 void helper_fldl2t_ST0(CPUX86State *env) 666 { 667 switch (env->fpuc & FPU_RC_MASK) { 668 case FPU_RC_UP: 669 ST0 = floatx80_l2t_u; 670 break; 671 default: 672 ST0 = floatx80_l2t; 673 break; 674 } 675 } 676 677 void helper_fldl2e_ST0(CPUX86State *env) 678 { 679 switch (env->fpuc & FPU_RC_MASK) { 680 case FPU_RC_DOWN: 681 case FPU_RC_CHOP: 682 ST0 = floatx80_l2e_d; 683 break; 684 default: 685 ST0 = floatx80_l2e; 686 break; 687 } 688 } 689 690 void helper_fldpi_ST0(CPUX86State *env) 691 { 692 switch (env->fpuc & FPU_RC_MASK) { 693 case FPU_RC_DOWN: 694 case FPU_RC_CHOP: 695 ST0 = floatx80_pi_d; 696 break; 697 default: 698 ST0 = floatx80_pi; 699 break; 700 } 701 } 702 703 void helper_fldlg2_ST0(CPUX86State *env) 704 { 705 switch (env->fpuc & FPU_RC_MASK) { 706 case FPU_RC_DOWN: 707 case FPU_RC_CHOP: 708 ST0 = floatx80_lg2_d; 709 break; 710 default: 711 ST0 = floatx80_lg2; 712 break; 713 } 714 } 715 716 void helper_fldln2_ST0(CPUX86State *env) 717 { 718 switch (env->fpuc & FPU_RC_MASK) { 719 case FPU_RC_DOWN: 720 case FPU_RC_CHOP: 721 ST0 = floatx80_ln2_d; 722 break; 723 default: 724 ST0 = floatx80_ln2; 725 break; 726 } 727 } 728 729 void helper_fldz_ST0(CPUX86State *env) 730 { 731 ST0 = floatx80_zero; 732 } 733 734 void helper_fldz_FT0(CPUX86State *env) 735 { 736 FT0 = floatx80_zero; 737 } 738 739 uint32_t helper_fnstsw(CPUX86State *env) 740 { 741 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 742 } 743 744 uint32_t helper_fnstcw(CPUX86State *env) 745 { 746 return env->fpuc; 747 } 748 749 static void set_x86_rounding_mode(unsigned mode, float_status *status) 750 { 751 static FloatRoundMode x86_round_mode[4] = { 752 float_round_nearest_even, 753 float_round_down, 754 float_round_up, 755 float_round_to_zero 756 }; 757 assert(mode < ARRAY_SIZE(x86_round_mode)); 758 set_float_rounding_mode(x86_round_mode[mode], status); 759 } 760 761 void update_fp_status(CPUX86State *env) 762 { 763 int rnd_mode; 764 FloatX80RoundPrec rnd_prec; 765 766 /* set rounding mode */ 767 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; 768 set_x86_rounding_mode(rnd_mode, &env->fp_status); 769 770 switch ((env->fpuc >> 8) & 3) { 771 case 0: 772 rnd_prec = floatx80_precision_s; 773 break; 774 case 2: 775 rnd_prec = floatx80_precision_d; 776 break; 777 case 3: 778 default: 779 rnd_prec = floatx80_precision_x; 780 break; 781 } 782 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 783 } 784 785 void helper_fldcw(CPUX86State *env, uint32_t val) 786 { 787 cpu_set_fpuc(env, val); 788 } 789 790 void helper_fclex(CPUX86State *env) 791 { 792 env->fpus &= 0x7f00; 793 } 794 795 void helper_fwait(CPUX86State *env) 796 { 797 if (env->fpus & FPUS_SE) { 798 fpu_raise_exception(env, GETPC()); 799 } 800 } 801 802 static void do_fninit(CPUX86State *env) 803 { 804 env->fpus = 0; 805 env->fpstt = 0; 806 env->fpcs = 0; 807 env->fpds = 0; 808 env->fpip = 0; 809 env->fpdp = 0; 810 cpu_set_fpuc(env, 0x37f); 811 env->fptags[0] = 1; 812 env->fptags[1] = 1; 813 env->fptags[2] = 1; 814 env->fptags[3] = 1; 815 env->fptags[4] = 1; 816 env->fptags[5] = 1; 817 env->fptags[6] = 1; 818 env->fptags[7] = 1; 819 } 820 821 void helper_fninit(CPUX86State *env) 822 { 823 do_fninit(env); 824 } 825 826 /* BCD ops */ 827 828 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 829 { 830 X86Access ac; 831 floatx80 tmp; 832 uint64_t val; 833 unsigned int v; 834 int i; 835 836 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); 837 838 val = 0; 839 for (i = 8; i >= 0; i--) { 840 v = access_ldb(&ac, ptr + i); 841 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 842 } 843 tmp = int64_to_floatx80(val, &env->fp_status); 844 if (access_ldb(&ac, ptr + 9) & 0x80) { 845 tmp = floatx80_chs(tmp); 846 } 847 fpush(env); 848 ST0 = tmp; 849 } 850 851 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 852 { 853 uint8_t old_flags = save_exception_flags(env); 854 int v; 855 target_ulong mem_ref, mem_end; 856 int64_t val; 857 CPU_LDoubleU temp; 858 X86Access ac; 859 860 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); 861 temp.d = ST0; 862 863 val = floatx80_to_int64(ST0, &env->fp_status); 864 mem_ref = ptr; 865 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 866 set_float_exception_flags(float_flag_invalid, &env->fp_status); 867 while (mem_ref < ptr + 7) { 868 access_stb(&ac, mem_ref++, 0); 869 } 870 access_stb(&ac, mem_ref++, 0xc0); 871 access_stb(&ac, mem_ref++, 0xff); 872 access_stb(&ac, mem_ref++, 0xff); 873 merge_exception_flags(env, old_flags); 874 return; 875 } 876 mem_end = mem_ref + 9; 877 if (SIGND(temp)) { 878 access_stb(&ac, mem_end, 0x80); 879 val = -val; 880 } else { 881 access_stb(&ac, mem_end, 0x00); 882 } 883 while (mem_ref < mem_end) { 884 if (val == 0) { 885 break; 886 } 887 v = val % 100; 888 val = val / 100; 889 v = ((v / 10) << 4) | (v % 10); 890 access_stb(&ac, mem_ref++, v); 891 } 892 while (mem_ref < mem_end) { 893 access_stb(&ac, mem_ref++, 0); 894 } 895 merge_exception_flags(env, old_flags); 896 } 897 898 /* 128-bit significand of log(2). */ 899 #define ln2_sig_high 0xb17217f7d1cf79abULL 900 #define ln2_sig_low 0xc9e3b39803f2f6afULL 901 902 /* 903 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 904 * the interval [-1/64, 1/64]. 905 */ 906 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 907 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 908 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 909 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 910 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 911 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 912 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 913 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 914 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 915 916 struct f2xm1_data { 917 /* 918 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 919 * are very close to exact floatx80 values. 920 */ 921 floatx80 t; 922 /* The value of 2^t. */ 923 floatx80 exp2; 924 /* The value of 2^t - 1. */ 925 floatx80 exp2m1; 926 }; 927 928 static const struct f2xm1_data f2xm1_table[65] = { 929 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 930 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 931 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 932 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 933 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 934 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 935 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 936 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 937 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 938 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 939 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 940 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 941 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 942 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 943 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 944 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 945 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 946 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 947 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 948 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 949 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 950 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 951 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 952 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 953 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 954 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 955 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 956 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 957 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 958 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 959 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 960 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 961 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 962 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 963 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 964 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 965 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 966 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 967 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 968 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 969 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 970 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 971 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 972 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 973 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 974 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 975 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 976 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 977 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 978 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 979 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 980 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 981 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 982 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 983 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 984 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 985 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 986 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 987 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 988 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 989 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 990 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 991 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 992 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 993 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 994 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 995 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 996 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 997 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 998 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 999 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 1000 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 1001 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 1002 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 1003 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 1004 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 1005 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 1006 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 1007 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 1008 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 1009 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 1010 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 1011 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 1012 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 1013 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 1014 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 1015 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 1016 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 1017 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 1018 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 1019 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 1020 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 1021 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 1022 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 1023 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 1024 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 1025 { floatx80_zero_init, 1026 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1027 floatx80_zero_init }, 1028 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 1029 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 1030 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 1031 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 1032 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 1033 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 1034 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 1035 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 1036 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 1037 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 1038 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 1039 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 1040 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 1041 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 1042 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 1043 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 1044 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 1045 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 1046 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 1047 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 1048 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 1049 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 1050 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 1051 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 1052 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 1053 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 1054 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 1055 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 1056 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 1057 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 1058 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 1059 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 1060 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 1061 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 1062 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 1063 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1064 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1065 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1066 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1067 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1068 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1069 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1070 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1071 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1072 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1073 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1074 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1075 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1076 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1077 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1078 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1079 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1080 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1081 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1082 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1083 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1084 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1085 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1086 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1087 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1088 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1089 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1090 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1091 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1092 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1093 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1094 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1095 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1096 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1097 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1098 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1099 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1100 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1101 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1102 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1103 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1104 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1105 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1106 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1107 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1108 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1109 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1110 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1111 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1112 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1113 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1114 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1115 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1116 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1117 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1118 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1119 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1120 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1121 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1122 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1123 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1124 }; 1125 1126 void helper_f2xm1(CPUX86State *env) 1127 { 1128 uint8_t old_flags = save_exception_flags(env); 1129 uint64_t sig = extractFloatx80Frac(ST0); 1130 int32_t exp = extractFloatx80Exp(ST0); 1131 bool sign = extractFloatx80Sign(ST0); 1132 1133 if (floatx80_invalid_encoding(ST0)) { 1134 float_raise(float_flag_invalid, &env->fp_status); 1135 ST0 = floatx80_default_nan(&env->fp_status); 1136 } else if (floatx80_is_any_nan(ST0)) { 1137 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1138 float_raise(float_flag_invalid, &env->fp_status); 1139 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1140 } 1141 } else if (exp > 0x3fff || 1142 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1143 /* Out of range for the instruction, treat as invalid. */ 1144 float_raise(float_flag_invalid, &env->fp_status); 1145 ST0 = floatx80_default_nan(&env->fp_status); 1146 } else if (exp == 0x3fff) { 1147 /* Argument 1 or -1, exact result 1 or -0.5. */ 1148 if (sign) { 1149 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1150 } 1151 } else if (exp < 0x3fb0) { 1152 if (!floatx80_is_zero(ST0)) { 1153 /* 1154 * Multiplying the argument by an extra-precision version 1155 * of log(2) is sufficiently precise. Zero arguments are 1156 * returned unchanged. 1157 */ 1158 uint64_t sig0, sig1, sig2; 1159 if (exp == 0) { 1160 normalizeFloatx80Subnormal(sig, &exp, &sig); 1161 } 1162 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1163 &sig2); 1164 /* This result is inexact. */ 1165 sig1 |= 1; 1166 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1167 sign, exp, sig0, sig1, 1168 &env->fp_status); 1169 } 1170 } else { 1171 floatx80 tmp, y, accum; 1172 bool asign, bsign; 1173 int32_t n, aexp, bexp; 1174 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1175 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1176 FloatX80RoundPrec save_prec = 1177 env->fp_status.floatx80_rounding_precision; 1178 env->fp_status.float_rounding_mode = float_round_nearest_even; 1179 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1180 1181 /* Find the nearest multiple of 1/32 to the argument. */ 1182 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1183 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1184 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1185 1186 if (floatx80_is_zero(y)) { 1187 /* 1188 * Use the value of 2^t - 1 from the table, to avoid 1189 * needing to special-case zero as a result of 1190 * multiplication below. 1191 */ 1192 ST0 = f2xm1_table[n].t; 1193 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1194 env->fp_status.float_rounding_mode = save_mode; 1195 } else { 1196 /* 1197 * Compute the lower parts of a polynomial expansion for 1198 * (2^y - 1) / y. 1199 */ 1200 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1201 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1202 accum = floatx80_mul(accum, y, &env->fp_status); 1203 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1204 accum = floatx80_mul(accum, y, &env->fp_status); 1205 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1206 accum = floatx80_mul(accum, y, &env->fp_status); 1207 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1208 accum = floatx80_mul(accum, y, &env->fp_status); 1209 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1210 accum = floatx80_mul(accum, y, &env->fp_status); 1211 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1212 accum = floatx80_mul(accum, y, &env->fp_status); 1213 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1214 1215 /* 1216 * The full polynomial expansion is f2xm1_coeff_0 + accum 1217 * (where accum has much lower magnitude, and so, in 1218 * particular, carry out of the addition is not possible). 1219 * (This expansion is only accurate to about 70 bits, not 1220 * 128 bits.) 1221 */ 1222 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1223 asign = extractFloatx80Sign(f2xm1_coeff_0); 1224 shift128RightJamming(extractFloatx80Frac(accum), 0, 1225 aexp - extractFloatx80Exp(accum), 1226 &asig0, &asig1); 1227 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1228 bsig1 = 0; 1229 if (asign == extractFloatx80Sign(accum)) { 1230 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1231 } else { 1232 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1233 } 1234 /* And thus compute an approximation to 2^y - 1. */ 1235 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1236 &asig0, &asig1, &asig2); 1237 aexp += extractFloatx80Exp(y) - 0x3ffe; 1238 asign ^= extractFloatx80Sign(y); 1239 if (n != 32) { 1240 /* 1241 * Multiply this by the precomputed value of 2^t and 1242 * add that of 2^t - 1. 1243 */ 1244 mul128By64To192(asig0, asig1, 1245 extractFloatx80Frac(f2xm1_table[n].exp2), 1246 &asig0, &asig1, &asig2); 1247 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1248 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1249 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1250 bsig1 = 0; 1251 if (bexp < aexp) { 1252 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1253 &bsig0, &bsig1); 1254 } else if (aexp < bexp) { 1255 shift128RightJamming(asig0, asig1, bexp - aexp, 1256 &asig0, &asig1); 1257 aexp = bexp; 1258 } 1259 /* The sign of 2^t - 1 is always that of the result. */ 1260 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1261 if (asign == bsign) { 1262 /* Avoid possible carry out of the addition. */ 1263 shift128RightJamming(asig0, asig1, 1, 1264 &asig0, &asig1); 1265 shift128RightJamming(bsig0, bsig1, 1, 1266 &bsig0, &bsig1); 1267 ++aexp; 1268 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1269 } else { 1270 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1271 asign = bsign; 1272 } 1273 } 1274 env->fp_status.float_rounding_mode = save_mode; 1275 /* This result is inexact. */ 1276 asig1 |= 1; 1277 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1278 asign, aexp, asig0, asig1, 1279 &env->fp_status); 1280 } 1281 1282 env->fp_status.floatx80_rounding_precision = save_prec; 1283 } 1284 merge_exception_flags(env, old_flags); 1285 } 1286 1287 void helper_fptan(CPUX86State *env) 1288 { 1289 double fptemp = floatx80_to_double(env, ST0); 1290 1291 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1292 env->fpus |= 0x400; 1293 } else { 1294 fptemp = tan(fptemp); 1295 ST0 = double_to_floatx80(env, fptemp); 1296 fpush(env); 1297 ST0 = floatx80_one; 1298 env->fpus &= ~0x400; /* C2 <-- 0 */ 1299 /* the above code is for |arg| < 2**52 only */ 1300 } 1301 } 1302 1303 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1304 #define pi_4_exp 0x3ffe 1305 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1306 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1307 #define pi_2_exp 0x3fff 1308 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1309 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1310 #define pi_34_exp 0x4000 1311 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1312 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1313 #define pi_exp 0x4000 1314 #define pi_sig_high 0xc90fdaa22168c234ULL 1315 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1316 1317 /* 1318 * Polynomial coefficients for an approximation to atan(x), with only 1319 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1320 * for some other approximations, no low part is needed for the first 1321 * coefficient here to achieve a sufficiently accurate result, because 1322 * the coefficient in this minimax approximation is very close to 1323 * exactly 1.) 1324 */ 1325 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1326 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1327 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1328 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1329 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1330 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1331 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1332 1333 struct fpatan_data { 1334 /* High and low parts of atan(x). */ 1335 floatx80 atan_high, atan_low; 1336 }; 1337 1338 static const struct fpatan_data fpatan_table[9] = { 1339 { floatx80_zero_init, 1340 floatx80_zero_init }, 1341 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1342 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1343 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1344 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1345 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1346 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1347 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1348 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1349 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1350 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1351 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1352 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1353 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1354 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1355 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1356 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1357 }; 1358 1359 void helper_fpatan(CPUX86State *env) 1360 { 1361 uint8_t old_flags = save_exception_flags(env); 1362 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1363 int32_t arg0_exp = extractFloatx80Exp(ST0); 1364 bool arg0_sign = extractFloatx80Sign(ST0); 1365 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1366 int32_t arg1_exp = extractFloatx80Exp(ST1); 1367 bool arg1_sign = extractFloatx80Sign(ST1); 1368 1369 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1370 float_raise(float_flag_invalid, &env->fp_status); 1371 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1372 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1373 float_raise(float_flag_invalid, &env->fp_status); 1374 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1375 } else if (floatx80_invalid_encoding(ST0) || 1376 floatx80_invalid_encoding(ST1)) { 1377 float_raise(float_flag_invalid, &env->fp_status); 1378 ST1 = floatx80_default_nan(&env->fp_status); 1379 } else if (floatx80_is_any_nan(ST0)) { 1380 ST1 = ST0; 1381 } else if (floatx80_is_any_nan(ST1)) { 1382 /* Pass this NaN through. */ 1383 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1384 /* Pass this zero through. */ 1385 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1386 arg0_exp - arg1_exp >= 80) && 1387 !arg0_sign) { 1388 /* 1389 * Dividing ST1 by ST0 gives the correct result up to 1390 * rounding, and avoids spurious underflow exceptions that 1391 * might result from passing some small values through the 1392 * polynomial approximation, but if a finite nonzero result of 1393 * division is exact, the result of fpatan is still inexact 1394 * (and underflowing where appropriate). 1395 */ 1396 FloatX80RoundPrec save_prec = 1397 env->fp_status.floatx80_rounding_precision; 1398 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1399 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1400 env->fp_status.floatx80_rounding_precision = save_prec; 1401 if (!floatx80_is_zero(ST1) && 1402 !(get_float_exception_flags(&env->fp_status) & 1403 float_flag_inexact)) { 1404 /* 1405 * The mathematical result is very slightly closer to zero 1406 * than this exact result. Round a value with the 1407 * significand adjusted accordingly to get the correct 1408 * exceptions, and possibly an adjusted result depending 1409 * on the rounding mode. 1410 */ 1411 uint64_t sig = extractFloatx80Frac(ST1); 1412 int32_t exp = extractFloatx80Exp(ST1); 1413 bool sign = extractFloatx80Sign(ST1); 1414 if (exp == 0) { 1415 normalizeFloatx80Subnormal(sig, &exp, &sig); 1416 } 1417 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1418 sign, exp, sig - 1, 1419 -1, &env->fp_status); 1420 } 1421 } else { 1422 /* The result is inexact. */ 1423 bool rsign = arg1_sign; 1424 int32_t rexp; 1425 uint64_t rsig0, rsig1; 1426 if (floatx80_is_zero(ST1)) { 1427 /* 1428 * ST0 is negative. The result is pi with the sign of 1429 * ST1. 1430 */ 1431 rexp = pi_exp; 1432 rsig0 = pi_sig_high; 1433 rsig1 = pi_sig_low; 1434 } else if (floatx80_is_infinity(ST1)) { 1435 if (floatx80_is_infinity(ST0)) { 1436 if (arg0_sign) { 1437 rexp = pi_34_exp; 1438 rsig0 = pi_34_sig_high; 1439 rsig1 = pi_34_sig_low; 1440 } else { 1441 rexp = pi_4_exp; 1442 rsig0 = pi_4_sig_high; 1443 rsig1 = pi_4_sig_low; 1444 } 1445 } else { 1446 rexp = pi_2_exp; 1447 rsig0 = pi_2_sig_high; 1448 rsig1 = pi_2_sig_low; 1449 } 1450 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1451 rexp = pi_2_exp; 1452 rsig0 = pi_2_sig_high; 1453 rsig1 = pi_2_sig_low; 1454 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1455 /* ST0 is negative. */ 1456 rexp = pi_exp; 1457 rsig0 = pi_sig_high; 1458 rsig1 = pi_sig_low; 1459 } else { 1460 /* 1461 * ST0 and ST1 are finite, nonzero and with exponents not 1462 * too far apart. 1463 */ 1464 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1465 int32_t azexp, axexp; 1466 bool adj_sub, ysign, zsign; 1467 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1468 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1469 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1470 uint64_t azsig0, azsig1; 1471 uint64_t azsig2, azsig3, axsig0, axsig1; 1472 floatx80 x8; 1473 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1474 FloatX80RoundPrec save_prec = 1475 env->fp_status.floatx80_rounding_precision; 1476 env->fp_status.float_rounding_mode = float_round_nearest_even; 1477 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1478 1479 if (arg0_exp == 0) { 1480 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1481 } 1482 if (arg1_exp == 0) { 1483 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1484 } 1485 if (arg0_exp > arg1_exp || 1486 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1487 /* Work with abs(ST1) / abs(ST0). */ 1488 num_exp = arg1_exp; 1489 num_sig = arg1_sig; 1490 den_exp = arg0_exp; 1491 den_sig = arg0_sig; 1492 if (arg0_sign) { 1493 /* The result is subtracted from pi. */ 1494 adj_exp = pi_exp; 1495 adj_sig0 = pi_sig_high; 1496 adj_sig1 = pi_sig_low; 1497 adj_sub = true; 1498 } else { 1499 /* The result is used as-is. */ 1500 adj_exp = 0; 1501 adj_sig0 = 0; 1502 adj_sig1 = 0; 1503 adj_sub = false; 1504 } 1505 } else { 1506 /* Work with abs(ST0) / abs(ST1). */ 1507 num_exp = arg0_exp; 1508 num_sig = arg0_sig; 1509 den_exp = arg1_exp; 1510 den_sig = arg1_sig; 1511 /* The result is added to or subtracted from pi/2. */ 1512 adj_exp = pi_2_exp; 1513 adj_sig0 = pi_2_sig_high; 1514 adj_sig1 = pi_2_sig_low; 1515 adj_sub = !arg0_sign; 1516 } 1517 1518 /* 1519 * Compute x = num/den, where 0 < x <= 1 and x is not too 1520 * small. 1521 */ 1522 xexp = num_exp - den_exp + 0x3ffe; 1523 remsig0 = num_sig; 1524 remsig1 = 0; 1525 if (den_sig <= remsig0) { 1526 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1527 ++xexp; 1528 } 1529 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1530 mul64To128(den_sig, xsig0, &msig0, &msig1); 1531 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1532 while ((int64_t) remsig0 < 0) { 1533 --xsig0; 1534 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1535 } 1536 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1537 /* 1538 * No need to correct any estimation error in xsig1; even 1539 * with such error, it is accurate enough. 1540 */ 1541 1542 /* 1543 * Split x as x = t + y, where t = n/8 is the nearest 1544 * multiple of 1/8 to x. 1545 */ 1546 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1547 false, xexp + 3, xsig0, 1548 xsig1, &env->fp_status); 1549 n = floatx80_to_int32(x8, &env->fp_status); 1550 if (n == 0) { 1551 ysign = false; 1552 yexp = xexp; 1553 ysig0 = xsig0; 1554 ysig1 = xsig1; 1555 texp = 0; 1556 tsig = 0; 1557 } else { 1558 int shift = clz32(n) + 32; 1559 texp = 0x403b - shift; 1560 tsig = n; 1561 tsig <<= shift; 1562 if (texp == xexp) { 1563 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1564 if ((int64_t) ysig0 >= 0) { 1565 ysign = false; 1566 if (ysig0 == 0) { 1567 if (ysig1 == 0) { 1568 yexp = 0; 1569 } else { 1570 shift = clz64(ysig1) + 64; 1571 yexp = xexp - shift; 1572 shift128Left(ysig0, ysig1, shift, 1573 &ysig0, &ysig1); 1574 } 1575 } else { 1576 shift = clz64(ysig0); 1577 yexp = xexp - shift; 1578 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1579 } 1580 } else { 1581 ysign = true; 1582 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1583 if (ysig0 == 0) { 1584 shift = clz64(ysig1) + 64; 1585 } else { 1586 shift = clz64(ysig0); 1587 } 1588 yexp = xexp - shift; 1589 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1590 } 1591 } else { 1592 /* 1593 * t's exponent must be greater than x's because t 1594 * is positive and the nearest multiple of 1/8 to 1595 * x, and if x has a greater exponent, the power 1596 * of 2 with that exponent is also a multiple of 1597 * 1/8. 1598 */ 1599 uint64_t usig0, usig1; 1600 shift128RightJamming(xsig0, xsig1, texp - xexp, 1601 &usig0, &usig1); 1602 ysign = true; 1603 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1604 if (ysig0 == 0) { 1605 shift = clz64(ysig1) + 64; 1606 } else { 1607 shift = clz64(ysig0); 1608 } 1609 yexp = texp - shift; 1610 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1611 } 1612 } 1613 1614 /* 1615 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1616 * arctan(z). 1617 */ 1618 zsign = ysign; 1619 if (texp == 0 || yexp == 0) { 1620 zexp = yexp; 1621 zsig0 = ysig0; 1622 zsig1 = ysig1; 1623 } else { 1624 /* 1625 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1626 */ 1627 int32_t dexp = texp + xexp - 0x3ffe; 1628 uint64_t dsig0, dsig1, dsig2; 1629 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1630 /* 1631 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1632 * bit). Add 1 to produce the denominator 1+tx. 1633 */ 1634 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1635 &dsig0, &dsig1); 1636 dsig0 |= 0x8000000000000000ULL; 1637 zexp = yexp - 1; 1638 remsig0 = ysig0; 1639 remsig1 = ysig1; 1640 remsig2 = 0; 1641 if (dsig0 <= remsig0) { 1642 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1643 ++zexp; 1644 } 1645 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1646 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1647 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1648 &remsig0, &remsig1, &remsig2); 1649 while ((int64_t) remsig0 < 0) { 1650 --zsig0; 1651 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1652 &remsig0, &remsig1, &remsig2); 1653 } 1654 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1655 /* No need to correct any estimation error in zsig1. */ 1656 } 1657 1658 if (zexp == 0) { 1659 azexp = 0; 1660 azsig0 = 0; 1661 azsig1 = 0; 1662 } else { 1663 floatx80 z2, accum; 1664 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1665 /* Compute z^2. */ 1666 mul128To256(zsig0, zsig1, zsig0, zsig1, 1667 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1668 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1669 zexp + zexp - 0x3ffe, 1670 z2sig0, z2sig1, 1671 &env->fp_status); 1672 1673 /* Compute the lower parts of the polynomial expansion. */ 1674 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1675 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1676 accum = floatx80_mul(accum, z2, &env->fp_status); 1677 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1678 accum = floatx80_mul(accum, z2, &env->fp_status); 1679 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1680 accum = floatx80_mul(accum, z2, &env->fp_status); 1681 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1682 accum = floatx80_mul(accum, z2, &env->fp_status); 1683 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1684 accum = floatx80_mul(accum, z2, &env->fp_status); 1685 1686 /* 1687 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1688 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1689 */ 1690 aexp = extractFloatx80Exp(fpatan_coeff_0); 1691 shift128RightJamming(extractFloatx80Frac(accum), 0, 1692 aexp - extractFloatx80Exp(accum), 1693 &asig0, &asig1); 1694 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1695 &asig0, &asig1); 1696 /* Multiply by z to compute arctan(z). */ 1697 azexp = aexp + zexp - 0x3ffe; 1698 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1699 &azsig2, &azsig3); 1700 } 1701 1702 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1703 if (texp == 0) { 1704 /* z is positive. */ 1705 axexp = azexp; 1706 axsig0 = azsig0; 1707 axsig1 = azsig1; 1708 } else { 1709 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1710 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1711 uint64_t low_sig0 = 1712 extractFloatx80Frac(fpatan_table[n].atan_low); 1713 uint64_t low_sig1 = 0; 1714 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1715 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1716 axsig1 = 0; 1717 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1718 &low_sig0, &low_sig1); 1719 if (low_sign) { 1720 sub128(axsig0, axsig1, low_sig0, low_sig1, 1721 &axsig0, &axsig1); 1722 } else { 1723 add128(axsig0, axsig1, low_sig0, low_sig1, 1724 &axsig0, &axsig1); 1725 } 1726 if (azexp >= axexp) { 1727 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1728 &axsig0, &axsig1); 1729 axexp = azexp + 1; 1730 shift128RightJamming(azsig0, azsig1, 1, 1731 &azsig0, &azsig1); 1732 } else { 1733 shift128RightJamming(axsig0, axsig1, 1, 1734 &axsig0, &axsig1); 1735 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1736 &azsig0, &azsig1); 1737 ++axexp; 1738 } 1739 if (zsign) { 1740 sub128(axsig0, axsig1, azsig0, azsig1, 1741 &axsig0, &axsig1); 1742 } else { 1743 add128(axsig0, axsig1, azsig0, azsig1, 1744 &axsig0, &axsig1); 1745 } 1746 } 1747 1748 if (adj_exp == 0) { 1749 rexp = axexp; 1750 rsig0 = axsig0; 1751 rsig1 = axsig1; 1752 } else { 1753 /* 1754 * Add or subtract arctan(x) (exponent axexp, 1755 * significand axsig0 and axsig1, positive, not 1756 * necessarily normalized) to the number given by 1757 * adj_exp, adj_sig0 and adj_sig1, according to 1758 * adj_sub. 1759 */ 1760 if (adj_exp >= axexp) { 1761 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1762 &axsig0, &axsig1); 1763 rexp = adj_exp + 1; 1764 shift128RightJamming(adj_sig0, adj_sig1, 1, 1765 &adj_sig0, &adj_sig1); 1766 } else { 1767 shift128RightJamming(axsig0, axsig1, 1, 1768 &axsig0, &axsig1); 1769 shift128RightJamming(adj_sig0, adj_sig1, 1770 axexp - adj_exp + 1, 1771 &adj_sig0, &adj_sig1); 1772 rexp = axexp + 1; 1773 } 1774 if (adj_sub) { 1775 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1776 &rsig0, &rsig1); 1777 } else { 1778 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1779 &rsig0, &rsig1); 1780 } 1781 } 1782 1783 env->fp_status.float_rounding_mode = save_mode; 1784 env->fp_status.floatx80_rounding_precision = save_prec; 1785 } 1786 /* This result is inexact. */ 1787 rsig1 |= 1; 1788 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1789 rsig0, rsig1, &env->fp_status); 1790 } 1791 1792 fpop(env); 1793 merge_exception_flags(env, old_flags); 1794 } 1795 1796 void helper_fxtract(CPUX86State *env) 1797 { 1798 uint8_t old_flags = save_exception_flags(env); 1799 CPU_LDoubleU temp; 1800 1801 temp.d = ST0; 1802 1803 if (floatx80_is_zero(ST0)) { 1804 /* Easy way to generate -inf and raising division by 0 exception */ 1805 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1806 &env->fp_status); 1807 fpush(env); 1808 ST0 = temp.d; 1809 } else if (floatx80_invalid_encoding(ST0)) { 1810 float_raise(float_flag_invalid, &env->fp_status); 1811 ST0 = floatx80_default_nan(&env->fp_status); 1812 fpush(env); 1813 ST0 = ST1; 1814 } else if (floatx80_is_any_nan(ST0)) { 1815 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1816 float_raise(float_flag_invalid, &env->fp_status); 1817 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1818 } 1819 fpush(env); 1820 ST0 = ST1; 1821 } else if (floatx80_is_infinity(ST0)) { 1822 fpush(env); 1823 ST0 = ST1; 1824 ST1 = floatx80_infinity; 1825 } else { 1826 int expdif; 1827 1828 if (EXPD(temp) == 0) { 1829 int shift = clz64(temp.l.lower); 1830 temp.l.lower <<= shift; 1831 expdif = 1 - EXPBIAS - shift; 1832 float_raise(float_flag_input_denormal, &env->fp_status); 1833 } else { 1834 expdif = EXPD(temp) - EXPBIAS; 1835 } 1836 /* DP exponent bias */ 1837 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1838 fpush(env); 1839 BIASEXPONENT(temp); 1840 ST0 = temp.d; 1841 } 1842 merge_exception_flags(env, old_flags); 1843 } 1844 1845 static void helper_fprem_common(CPUX86State *env, bool mod) 1846 { 1847 uint8_t old_flags = save_exception_flags(env); 1848 uint64_t quotient; 1849 CPU_LDoubleU temp0, temp1; 1850 int exp0, exp1, expdiff; 1851 1852 temp0.d = ST0; 1853 temp1.d = ST1; 1854 exp0 = EXPD(temp0); 1855 exp1 = EXPD(temp1); 1856 1857 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1858 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1859 exp0 == 0x7fff || exp1 == 0x7fff || 1860 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1861 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1862 } else { 1863 if (exp0 == 0) { 1864 exp0 = 1 - clz64(temp0.l.lower); 1865 } 1866 if (exp1 == 0) { 1867 exp1 = 1 - clz64(temp1.l.lower); 1868 } 1869 expdiff = exp0 - exp1; 1870 if (expdiff < 64) { 1871 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1872 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1873 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1874 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1875 } else { 1876 /* 1877 * Partial remainder. This choice of how many bits to 1878 * process at once is specified in AMD instruction set 1879 * manuals, and empirically is followed by Intel 1880 * processors as well; it ensures that the final remainder 1881 * operation in a loop does produce the correct low three 1882 * bits of the quotient. AMD manuals specify that the 1883 * flags other than C2 are cleared, and empirically Intel 1884 * processors clear them as well. 1885 */ 1886 int n = 32 + (expdiff % 32); 1887 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1888 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1889 env->fpus |= 0x400; /* C2 <-- 1 */ 1890 } 1891 } 1892 merge_exception_flags(env, old_flags); 1893 } 1894 1895 void helper_fprem1(CPUX86State *env) 1896 { 1897 helper_fprem_common(env, false); 1898 } 1899 1900 void helper_fprem(CPUX86State *env) 1901 { 1902 helper_fprem_common(env, true); 1903 } 1904 1905 /* 128-bit significand of log2(e). */ 1906 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1907 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1908 1909 /* 1910 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1911 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1912 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1913 * interval [sqrt(2)/2, sqrt(2)]. 1914 */ 1915 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1916 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1917 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1918 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1919 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1920 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1921 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1922 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1923 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1924 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1925 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1926 1927 /* 1928 * Compute an approximation of log2(1+arg), where 1+arg is in the 1929 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1930 * function is called, rounding precision is set to 80 and the 1931 * round-to-nearest mode is in effect. arg must not be exactly zero, 1932 * and must not be so close to zero that underflow might occur. 1933 */ 1934 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1935 uint64_t *sig0, uint64_t *sig1) 1936 { 1937 uint64_t arg0_sig = extractFloatx80Frac(arg); 1938 int32_t arg0_exp = extractFloatx80Exp(arg); 1939 bool arg0_sign = extractFloatx80Sign(arg); 1940 bool asign; 1941 int32_t dexp, texp, aexp; 1942 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1943 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1944 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1945 floatx80 t2, accum; 1946 1947 /* 1948 * Compute an approximation of arg/(2+arg), with extra precision, 1949 * as the argument to a polynomial approximation. The extra 1950 * precision is only needed for the first term of the 1951 * approximation, with subsequent terms being significantly 1952 * smaller; the approximation only uses odd exponents, and the 1953 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1954 */ 1955 if (arg0_sign) { 1956 dexp = 0x3fff; 1957 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1958 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1959 } else { 1960 dexp = 0x4000; 1961 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1962 dsig0 |= 0x8000000000000000ULL; 1963 } 1964 texp = arg0_exp - dexp + 0x3ffe; 1965 rsig0 = arg0_sig; 1966 rsig1 = 0; 1967 rsig2 = 0; 1968 if (dsig0 <= rsig0) { 1969 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1970 ++texp; 1971 } 1972 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1973 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1974 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1975 &rsig0, &rsig1, &rsig2); 1976 while ((int64_t) rsig0 < 0) { 1977 --tsig0; 1978 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1979 &rsig0, &rsig1, &rsig2); 1980 } 1981 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1982 /* 1983 * No need to correct any estimation error in tsig1; even with 1984 * such error, it is accurate enough. Now compute the square of 1985 * that approximation. 1986 */ 1987 mul128To256(tsig0, tsig1, tsig0, tsig1, 1988 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1989 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1990 texp + texp - 0x3ffe, 1991 t2sig0, t2sig1, &env->fp_status); 1992 1993 /* Compute the lower parts of the polynomial expansion. */ 1994 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1995 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1996 accum = floatx80_mul(accum, t2, &env->fp_status); 1997 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1998 accum = floatx80_mul(accum, t2, &env->fp_status); 1999 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 2000 accum = floatx80_mul(accum, t2, &env->fp_status); 2001 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 2002 accum = floatx80_mul(accum, t2, &env->fp_status); 2003 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 2004 accum = floatx80_mul(accum, t2, &env->fp_status); 2005 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 2006 accum = floatx80_mul(accum, t2, &env->fp_status); 2007 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 2008 accum = floatx80_mul(accum, t2, &env->fp_status); 2009 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 2010 accum = floatx80_mul(accum, t2, &env->fp_status); 2011 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 2012 2013 /* 2014 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 2015 * accum has much lower magnitude, and so, in particular, carry 2016 * out of the addition is not possible), multiplied by t. (This 2017 * expansion is only accurate to about 70 bits, not 128 bits.) 2018 */ 2019 aexp = extractFloatx80Exp(fyl2x_coeff_0); 2020 asign = extractFloatx80Sign(fyl2x_coeff_0); 2021 shift128RightJamming(extractFloatx80Frac(accum), 0, 2022 aexp - extractFloatx80Exp(accum), 2023 &asig0, &asig1); 2024 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 2025 bsig1 = 0; 2026 if (asign == extractFloatx80Sign(accum)) { 2027 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 2028 } else { 2029 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 2030 } 2031 /* Multiply by t to compute the required result. */ 2032 mul128To256(asig0, asig1, tsig0, tsig1, 2033 &asig0, &asig1, &asig2, &asig3); 2034 aexp += texp - 0x3ffe; 2035 *exp = aexp; 2036 *sig0 = asig0; 2037 *sig1 = asig1; 2038 } 2039 2040 void helper_fyl2xp1(CPUX86State *env) 2041 { 2042 uint8_t old_flags = save_exception_flags(env); 2043 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2044 int32_t arg0_exp = extractFloatx80Exp(ST0); 2045 bool arg0_sign = extractFloatx80Sign(ST0); 2046 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2047 int32_t arg1_exp = extractFloatx80Exp(ST1); 2048 bool arg1_sign = extractFloatx80Sign(ST1); 2049 2050 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2051 float_raise(float_flag_invalid, &env->fp_status); 2052 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2053 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2054 float_raise(float_flag_invalid, &env->fp_status); 2055 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2056 } else if (floatx80_invalid_encoding(ST0) || 2057 floatx80_invalid_encoding(ST1)) { 2058 float_raise(float_flag_invalid, &env->fp_status); 2059 ST1 = floatx80_default_nan(&env->fp_status); 2060 } else if (floatx80_is_any_nan(ST0)) { 2061 ST1 = ST0; 2062 } else if (floatx80_is_any_nan(ST1)) { 2063 /* Pass this NaN through. */ 2064 } else if (arg0_exp > 0x3ffd || 2065 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2066 0x95f619980c4336f7ULL : 2067 0xd413cccfe7799211ULL))) { 2068 /* 2069 * Out of range for the instruction (ST0 must have absolute 2070 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2071 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2072 * to sqrt(2) - 1, which we allow here), treat as invalid. 2073 */ 2074 float_raise(float_flag_invalid, &env->fp_status); 2075 ST1 = floatx80_default_nan(&env->fp_status); 2076 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2077 arg1_exp == 0x7fff) { 2078 /* 2079 * One argument is zero, or multiplying by infinity; correct 2080 * result is exact and can be obtained by multiplying the 2081 * arguments. 2082 */ 2083 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2084 } else if (arg0_exp < 0x3fb0) { 2085 /* 2086 * Multiplying both arguments and an extra-precision version 2087 * of log2(e) is sufficiently precise. 2088 */ 2089 uint64_t sig0, sig1, sig2; 2090 int32_t exp; 2091 if (arg0_exp == 0) { 2092 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2093 } 2094 if (arg1_exp == 0) { 2095 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2096 } 2097 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2098 &sig0, &sig1, &sig2); 2099 exp = arg0_exp + 1; 2100 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2101 exp += arg1_exp - 0x3ffe; 2102 /* This result is inexact. */ 2103 sig1 |= 1; 2104 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2105 arg0_sign ^ arg1_sign, exp, 2106 sig0, sig1, &env->fp_status); 2107 } else { 2108 int32_t aexp; 2109 uint64_t asig0, asig1, asig2; 2110 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2111 FloatX80RoundPrec save_prec = 2112 env->fp_status.floatx80_rounding_precision; 2113 env->fp_status.float_rounding_mode = float_round_nearest_even; 2114 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2115 2116 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2117 /* 2118 * Multiply by the second argument to compute the required 2119 * result. 2120 */ 2121 if (arg1_exp == 0) { 2122 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2123 } 2124 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2125 aexp += arg1_exp - 0x3ffe; 2126 /* This result is inexact. */ 2127 asig1 |= 1; 2128 env->fp_status.float_rounding_mode = save_mode; 2129 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2130 arg0_sign ^ arg1_sign, aexp, 2131 asig0, asig1, &env->fp_status); 2132 env->fp_status.floatx80_rounding_precision = save_prec; 2133 } 2134 fpop(env); 2135 merge_exception_flags(env, old_flags); 2136 } 2137 2138 void helper_fyl2x(CPUX86State *env) 2139 { 2140 uint8_t old_flags = save_exception_flags(env); 2141 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2142 int32_t arg0_exp = extractFloatx80Exp(ST0); 2143 bool arg0_sign = extractFloatx80Sign(ST0); 2144 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2145 int32_t arg1_exp = extractFloatx80Exp(ST1); 2146 bool arg1_sign = extractFloatx80Sign(ST1); 2147 2148 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2149 float_raise(float_flag_invalid, &env->fp_status); 2150 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2151 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2152 float_raise(float_flag_invalid, &env->fp_status); 2153 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2154 } else if (floatx80_invalid_encoding(ST0) || 2155 floatx80_invalid_encoding(ST1)) { 2156 float_raise(float_flag_invalid, &env->fp_status); 2157 ST1 = floatx80_default_nan(&env->fp_status); 2158 } else if (floatx80_is_any_nan(ST0)) { 2159 ST1 = ST0; 2160 } else if (floatx80_is_any_nan(ST1)) { 2161 /* Pass this NaN through. */ 2162 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2163 float_raise(float_flag_invalid, &env->fp_status); 2164 ST1 = floatx80_default_nan(&env->fp_status); 2165 } else if (floatx80_is_infinity(ST1)) { 2166 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2167 &env->fp_status); 2168 switch (cmp) { 2169 case float_relation_less: 2170 ST1 = floatx80_chs(ST1); 2171 break; 2172 case float_relation_greater: 2173 /* Result is infinity of the same sign as ST1. */ 2174 break; 2175 default: 2176 float_raise(float_flag_invalid, &env->fp_status); 2177 ST1 = floatx80_default_nan(&env->fp_status); 2178 break; 2179 } 2180 } else if (floatx80_is_infinity(ST0)) { 2181 if (floatx80_is_zero(ST1)) { 2182 float_raise(float_flag_invalid, &env->fp_status); 2183 ST1 = floatx80_default_nan(&env->fp_status); 2184 } else if (arg1_sign) { 2185 ST1 = floatx80_chs(ST0); 2186 } else { 2187 ST1 = ST0; 2188 } 2189 } else if (floatx80_is_zero(ST0)) { 2190 if (floatx80_is_zero(ST1)) { 2191 float_raise(float_flag_invalid, &env->fp_status); 2192 ST1 = floatx80_default_nan(&env->fp_status); 2193 } else { 2194 /* Result is infinity with opposite sign to ST1. */ 2195 float_raise(float_flag_divbyzero, &env->fp_status); 2196 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2197 0x8000000000000000ULL); 2198 } 2199 } else if (floatx80_is_zero(ST1)) { 2200 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2201 ST1 = floatx80_chs(ST1); 2202 } 2203 /* Otherwise, ST1 is already the correct result. */ 2204 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2205 if (arg1_sign) { 2206 ST1 = floatx80_chs(floatx80_zero); 2207 } else { 2208 ST1 = floatx80_zero; 2209 } 2210 } else { 2211 int32_t int_exp; 2212 floatx80 arg0_m1; 2213 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2214 FloatX80RoundPrec save_prec = 2215 env->fp_status.floatx80_rounding_precision; 2216 env->fp_status.float_rounding_mode = float_round_nearest_even; 2217 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2218 2219 if (arg0_exp == 0) { 2220 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2221 } 2222 if (arg1_exp == 0) { 2223 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2224 } 2225 int_exp = arg0_exp - 0x3fff; 2226 if (arg0_sig > 0xb504f333f9de6484ULL) { 2227 ++int_exp; 2228 } 2229 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2230 &env->fp_status), 2231 floatx80_one, &env->fp_status); 2232 if (floatx80_is_zero(arg0_m1)) { 2233 /* Exact power of 2; multiply by ST1. */ 2234 env->fp_status.float_rounding_mode = save_mode; 2235 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2236 ST1, &env->fp_status); 2237 } else { 2238 bool asign = extractFloatx80Sign(arg0_m1); 2239 int32_t aexp; 2240 uint64_t asig0, asig1, asig2; 2241 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2242 if (int_exp != 0) { 2243 bool isign = (int_exp < 0); 2244 int32_t iexp; 2245 uint64_t isig; 2246 int shift; 2247 int_exp = isign ? -int_exp : int_exp; 2248 shift = clz32(int_exp) + 32; 2249 isig = int_exp; 2250 isig <<= shift; 2251 iexp = 0x403e - shift; 2252 shift128RightJamming(asig0, asig1, iexp - aexp, 2253 &asig0, &asig1); 2254 if (asign == isign) { 2255 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2256 } else { 2257 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2258 } 2259 aexp = iexp; 2260 asign = isign; 2261 } 2262 /* 2263 * Multiply by the second argument to compute the required 2264 * result. 2265 */ 2266 if (arg1_exp == 0) { 2267 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2268 } 2269 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2270 aexp += arg1_exp - 0x3ffe; 2271 /* This result is inexact. */ 2272 asig1 |= 1; 2273 env->fp_status.float_rounding_mode = save_mode; 2274 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2275 asign ^ arg1_sign, aexp, 2276 asig0, asig1, &env->fp_status); 2277 } 2278 2279 env->fp_status.floatx80_rounding_precision = save_prec; 2280 } 2281 fpop(env); 2282 merge_exception_flags(env, old_flags); 2283 } 2284 2285 void helper_fsqrt(CPUX86State *env) 2286 { 2287 uint8_t old_flags = save_exception_flags(env); 2288 if (floatx80_is_neg(ST0)) { 2289 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2290 env->fpus |= 0x400; 2291 } 2292 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2293 merge_exception_flags(env, old_flags); 2294 } 2295 2296 void helper_fsincos(CPUX86State *env) 2297 { 2298 double fptemp = floatx80_to_double(env, ST0); 2299 2300 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2301 env->fpus |= 0x400; 2302 } else { 2303 ST0 = double_to_floatx80(env, sin(fptemp)); 2304 fpush(env); 2305 ST0 = double_to_floatx80(env, cos(fptemp)); 2306 env->fpus &= ~0x400; /* C2 <-- 0 */ 2307 /* the above code is for |arg| < 2**63 only */ 2308 } 2309 } 2310 2311 void helper_frndint(CPUX86State *env) 2312 { 2313 uint8_t old_flags = save_exception_flags(env); 2314 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2315 merge_exception_flags(env, old_flags); 2316 } 2317 2318 void helper_fscale(CPUX86State *env) 2319 { 2320 uint8_t old_flags = save_exception_flags(env); 2321 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2322 float_raise(float_flag_invalid, &env->fp_status); 2323 ST0 = floatx80_default_nan(&env->fp_status); 2324 } else if (floatx80_is_any_nan(ST1)) { 2325 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2326 float_raise(float_flag_invalid, &env->fp_status); 2327 } 2328 ST0 = ST1; 2329 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2330 float_raise(float_flag_invalid, &env->fp_status); 2331 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2332 } 2333 } else if (floatx80_is_infinity(ST1) && 2334 !floatx80_invalid_encoding(ST0) && 2335 !floatx80_is_any_nan(ST0)) { 2336 if (floatx80_is_neg(ST1)) { 2337 if (floatx80_is_infinity(ST0)) { 2338 float_raise(float_flag_invalid, &env->fp_status); 2339 ST0 = floatx80_default_nan(&env->fp_status); 2340 } else { 2341 ST0 = (floatx80_is_neg(ST0) ? 2342 floatx80_chs(floatx80_zero) : 2343 floatx80_zero); 2344 } 2345 } else { 2346 if (floatx80_is_zero(ST0)) { 2347 float_raise(float_flag_invalid, &env->fp_status); 2348 ST0 = floatx80_default_nan(&env->fp_status); 2349 } else { 2350 ST0 = (floatx80_is_neg(ST0) ? 2351 floatx80_chs(floatx80_infinity) : 2352 floatx80_infinity); 2353 } 2354 } 2355 } else { 2356 int n; 2357 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2358 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2359 set_float_exception_flags(0, &env->fp_status); 2360 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2361 set_float_exception_flags(save_flags, &env->fp_status); 2362 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2363 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2364 env->fp_status.floatx80_rounding_precision = save; 2365 } 2366 merge_exception_flags(env, old_flags); 2367 } 2368 2369 void helper_fsin(CPUX86State *env) 2370 { 2371 double fptemp = floatx80_to_double(env, ST0); 2372 2373 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2374 env->fpus |= 0x400; 2375 } else { 2376 ST0 = double_to_floatx80(env, sin(fptemp)); 2377 env->fpus &= ~0x400; /* C2 <-- 0 */ 2378 /* the above code is for |arg| < 2**53 only */ 2379 } 2380 } 2381 2382 void helper_fcos(CPUX86State *env) 2383 { 2384 double fptemp = floatx80_to_double(env, ST0); 2385 2386 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2387 env->fpus |= 0x400; 2388 } else { 2389 ST0 = double_to_floatx80(env, cos(fptemp)); 2390 env->fpus &= ~0x400; /* C2 <-- 0 */ 2391 /* the above code is for |arg| < 2**63 only */ 2392 } 2393 } 2394 2395 void helper_fxam_ST0(CPUX86State *env) 2396 { 2397 CPU_LDoubleU temp; 2398 int expdif; 2399 2400 temp.d = ST0; 2401 2402 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2403 if (SIGND(temp)) { 2404 env->fpus |= 0x200; /* C1 <-- 1 */ 2405 } 2406 2407 if (env->fptags[env->fpstt]) { 2408 env->fpus |= 0x4100; /* Empty */ 2409 return; 2410 } 2411 2412 expdif = EXPD(temp); 2413 if (expdif == MAXEXPD) { 2414 if (MANTD(temp) == 0x8000000000000000ULL) { 2415 env->fpus |= 0x500; /* Infinity */ 2416 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2417 env->fpus |= 0x100; /* NaN */ 2418 } 2419 } else if (expdif == 0) { 2420 if (MANTD(temp) == 0) { 2421 env->fpus |= 0x4000; /* Zero */ 2422 } else { 2423 env->fpus |= 0x4400; /* Denormal */ 2424 } 2425 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2426 env->fpus |= 0x400; 2427 } 2428 } 2429 2430 static void do_fstenv(X86Access *ac, target_ulong ptr, int data32) 2431 { 2432 CPUX86State *env = ac->env; 2433 int fpus, fptag, exp, i; 2434 uint64_t mant; 2435 CPU_LDoubleU tmp; 2436 2437 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2438 fptag = 0; 2439 for (i = 7; i >= 0; i--) { 2440 fptag <<= 2; 2441 if (env->fptags[i]) { 2442 fptag |= 3; 2443 } else { 2444 tmp.d = env->fpregs[i].d; 2445 exp = EXPD(tmp); 2446 mant = MANTD(tmp); 2447 if (exp == 0 && mant == 0) { 2448 /* zero */ 2449 fptag |= 1; 2450 } else if (exp == 0 || exp == MAXEXPD 2451 || (mant & (1LL << 63)) == 0) { 2452 /* NaNs, infinity, denormal */ 2453 fptag |= 2; 2454 } 2455 } 2456 } 2457 if (data32) { 2458 /* 32 bit */ 2459 access_stl(ac, ptr, env->fpuc); 2460 access_stl(ac, ptr + 4, fpus); 2461 access_stl(ac, ptr + 8, fptag); 2462 access_stl(ac, ptr + 12, env->fpip); /* fpip */ 2463 access_stl(ac, ptr + 16, env->fpcs); /* fpcs */ 2464 access_stl(ac, ptr + 20, env->fpdp); /* fpoo */ 2465 access_stl(ac, ptr + 24, env->fpds); /* fpos */ 2466 } else { 2467 /* 16 bit */ 2468 access_stw(ac, ptr, env->fpuc); 2469 access_stw(ac, ptr + 2, fpus); 2470 access_stw(ac, ptr + 4, fptag); 2471 access_stw(ac, ptr + 6, env->fpip); 2472 access_stw(ac, ptr + 8, env->fpcs); 2473 access_stw(ac, ptr + 10, env->fpdp); 2474 access_stw(ac, ptr + 12, env->fpds); 2475 } 2476 } 2477 2478 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2479 { 2480 X86Access ac; 2481 2482 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); 2483 do_fstenv(&ac, ptr, data32); 2484 } 2485 2486 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2487 { 2488 env->fpstt = (fpus >> 11) & 7; 2489 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2490 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2491 #if !defined(CONFIG_USER_ONLY) 2492 if (!(env->fpus & FPUS_SE)) { 2493 /* 2494 * Here the processor deasserts FERR#; in response, the chipset deasserts 2495 * IGNNE#. 2496 */ 2497 cpu_clear_ignne(); 2498 } 2499 #endif 2500 } 2501 2502 static void do_fldenv(X86Access *ac, target_ulong ptr, int data32) 2503 { 2504 int i, fpus, fptag; 2505 CPUX86State *env = ac->env; 2506 2507 cpu_set_fpuc(env, access_ldw(ac, ptr)); 2508 fpus = access_ldw(ac, ptr + (2 << data32)); 2509 fptag = access_ldw(ac, ptr + (4 << data32)); 2510 2511 cpu_set_fpus(env, fpus); 2512 for (i = 0; i < 8; i++) { 2513 env->fptags[i] = ((fptag & 3) == 3); 2514 fptag >>= 2; 2515 } 2516 } 2517 2518 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2519 { 2520 X86Access ac; 2521 2522 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); 2523 do_fldenv(&ac, ptr, data32); 2524 } 2525 2526 static void do_fsave(X86Access *ac, target_ulong ptr, int data32) 2527 { 2528 CPUX86State *env = ac->env; 2529 2530 do_fstenv(ac, ptr, data32); 2531 ptr += 14 << data32; 2532 2533 for (int i = 0; i < 8; i++) { 2534 floatx80 tmp = ST(i); 2535 do_fstt(ac, ptr, tmp); 2536 ptr += 10; 2537 } 2538 2539 do_fninit(env); 2540 } 2541 2542 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2543 { 2544 int size = (14 << data32) + 80; 2545 X86Access ac; 2546 2547 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC()); 2548 do_fsave(&ac, ptr, data32); 2549 } 2550 2551 static void do_frstor(X86Access *ac, target_ulong ptr, int data32) 2552 { 2553 CPUX86State *env = ac->env; 2554 2555 do_fldenv(ac, ptr, data32); 2556 ptr += 14 << data32; 2557 2558 for (int i = 0; i < 8; i++) { 2559 floatx80 tmp = do_fldt(ac, ptr); 2560 ST(i) = tmp; 2561 ptr += 10; 2562 } 2563 } 2564 2565 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2566 { 2567 int size = (14 << data32) + 80; 2568 X86Access ac; 2569 2570 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC()); 2571 do_frstor(&ac, ptr, data32); 2572 } 2573 2574 #define XO(X) offsetof(X86XSaveArea, X) 2575 2576 static void do_xsave_fpu(X86Access *ac, target_ulong ptr) 2577 { 2578 CPUX86State *env = ac->env; 2579 int fpus, fptag, i; 2580 target_ulong addr; 2581 2582 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2583 fptag = 0; 2584 for (i = 0; i < 8; i++) { 2585 fptag |= (env->fptags[i] << i); 2586 } 2587 2588 access_stw(ac, ptr + XO(legacy.fcw), env->fpuc); 2589 access_stw(ac, ptr + XO(legacy.fsw), fpus); 2590 access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff); 2591 2592 /* In 32-bit mode this is eip, sel, dp, sel. 2593 In 64-bit mode this is rip, rdp. 2594 But in either case we don't write actual data, just zeros. */ 2595 access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */ 2596 access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */ 2597 2598 addr = ptr + XO(legacy.fpregs); 2599 2600 for (i = 0; i < 8; i++) { 2601 floatx80 tmp = ST(i); 2602 do_fstt(ac, addr, tmp); 2603 addr += 16; 2604 } 2605 } 2606 2607 static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr) 2608 { 2609 CPUX86State *env = ac->env; 2610 2611 update_mxcsr_from_sse_status(env); 2612 access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr); 2613 access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff); 2614 } 2615 2616 static void do_xsave_sse(X86Access *ac, target_ulong ptr) 2617 { 2618 CPUX86State *env = ac->env; 2619 int i, nb_xmm_regs; 2620 target_ulong addr; 2621 2622 if (env->hflags & HF_CS64_MASK) { 2623 nb_xmm_regs = 16; 2624 } else { 2625 nb_xmm_regs = 8; 2626 } 2627 2628 addr = ptr + XO(legacy.xmm_regs); 2629 for (i = 0; i < nb_xmm_regs; i++) { 2630 access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0)); 2631 access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1)); 2632 addr += 16; 2633 } 2634 } 2635 2636 static void do_xsave_ymmh(X86Access *ac, target_ulong ptr) 2637 { 2638 CPUX86State *env = ac->env; 2639 int i, nb_xmm_regs; 2640 2641 if (env->hflags & HF_CS64_MASK) { 2642 nb_xmm_regs = 16; 2643 } else { 2644 nb_xmm_regs = 8; 2645 } 2646 2647 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2648 access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2)); 2649 access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3)); 2650 } 2651 } 2652 2653 static void do_xsave_bndregs(X86Access *ac, target_ulong ptr) 2654 { 2655 CPUX86State *env = ac->env; 2656 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2657 int i; 2658 2659 for (i = 0; i < 4; i++, addr += 16) { 2660 access_stq(ac, addr, env->bnd_regs[i].lb); 2661 access_stq(ac, addr + 8, env->bnd_regs[i].ub); 2662 } 2663 } 2664 2665 static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr) 2666 { 2667 CPUX86State *env = ac->env; 2668 2669 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2670 env->bndcs_regs.cfgu); 2671 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2672 env->bndcs_regs.sts); 2673 } 2674 2675 static void do_xsave_pkru(X86Access *ac, target_ulong ptr) 2676 { 2677 access_stq(ac, ptr, ac->env->pkru); 2678 } 2679 2680 static void do_fxsave(X86Access *ac, target_ulong ptr) 2681 { 2682 CPUX86State *env = ac->env; 2683 2684 do_xsave_fpu(ac, ptr); 2685 if (env->cr[4] & CR4_OSFXSR_MASK) { 2686 do_xsave_mxcsr(ac, ptr); 2687 /* Fast FXSAVE leaves out the XMM registers */ 2688 if (!(env->efer & MSR_EFER_FFXSR) 2689 || (env->hflags & HF_CPL_MASK) 2690 || !(env->hflags & HF_LMA_MASK)) { 2691 do_xsave_sse(ac, ptr); 2692 } 2693 } 2694 } 2695 2696 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2697 { 2698 uintptr_t ra = GETPC(); 2699 X86Access ac; 2700 2701 /* The operand must be 16 byte aligned */ 2702 if (ptr & 0xf) { 2703 raise_exception_ra(env, EXCP0D_GPF, ra); 2704 } 2705 2706 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), 2707 MMU_DATA_STORE, ra); 2708 do_fxsave(&ac, ptr); 2709 } 2710 2711 static uint64_t get_xinuse(CPUX86State *env) 2712 { 2713 uint64_t inuse = -1; 2714 2715 /* For the most part, we don't track XINUSE. We could calculate it 2716 here for all components, but it's probably less work to simply 2717 indicate in use. That said, the state of BNDREGS is important 2718 enough to track in HFLAGS, so we might as well use that here. */ 2719 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2720 inuse &= ~XSTATE_BNDREGS_MASK; 2721 } 2722 return inuse; 2723 } 2724 2725 static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm, 2726 uint64_t inuse, uint64_t opt) 2727 { 2728 uint64_t old_bv, new_bv; 2729 2730 if (opt & XSTATE_FP_MASK) { 2731 do_xsave_fpu(ac, ptr); 2732 } 2733 if (rfbm & XSTATE_SSE_MASK) { 2734 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2735 do_xsave_mxcsr(ac, ptr); 2736 } 2737 if (opt & XSTATE_SSE_MASK) { 2738 do_xsave_sse(ac, ptr); 2739 } 2740 if (opt & XSTATE_YMM_MASK) { 2741 do_xsave_ymmh(ac, ptr + XO(avx_state)); 2742 } 2743 if (opt & XSTATE_BNDREGS_MASK) { 2744 do_xsave_bndregs(ac, ptr + XO(bndreg_state)); 2745 } 2746 if (opt & XSTATE_BNDCSR_MASK) { 2747 do_xsave_bndcsr(ac, ptr + XO(bndcsr_state)); 2748 } 2749 if (opt & XSTATE_PKRU_MASK) { 2750 do_xsave_pkru(ac, ptr + XO(pkru_state)); 2751 } 2752 2753 /* Update the XSTATE_BV field. */ 2754 old_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); 2755 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2756 access_stq(ac, ptr + XO(header.xstate_bv), new_bv); 2757 } 2758 2759 static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2760 { 2761 /* The OS must have enabled XSAVE. */ 2762 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2763 raise_exception_ra(env, EXCP06_ILLOP, ra); 2764 } 2765 2766 /* The operand must be 64 byte aligned. */ 2767 if (ptr & 63) { 2768 raise_exception_ra(env, EXCP0D_GPF, ra); 2769 } 2770 } 2771 2772 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2773 uint64_t inuse, uint64_t opt, uintptr_t ra) 2774 { 2775 X86Access ac; 2776 unsigned size; 2777 2778 do_xsave_chk(env, ptr, ra); 2779 2780 /* Never save anything not enabled by XCR0. */ 2781 rfbm &= env->xcr0; 2782 opt &= rfbm; 2783 size = xsave_area_size(opt, false); 2784 2785 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra); 2786 do_xsave_access(&ac, ptr, rfbm, inuse, opt); 2787 } 2788 2789 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2790 { 2791 do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC()); 2792 } 2793 2794 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2795 { 2796 uint64_t inuse = get_xinuse(env); 2797 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2798 } 2799 2800 static void do_xrstor_fpu(X86Access *ac, target_ulong ptr) 2801 { 2802 CPUX86State *env = ac->env; 2803 int i, fpuc, fpus, fptag; 2804 target_ulong addr; 2805 2806 fpuc = access_ldw(ac, ptr + XO(legacy.fcw)); 2807 fpus = access_ldw(ac, ptr + XO(legacy.fsw)); 2808 fptag = access_ldw(ac, ptr + XO(legacy.ftw)); 2809 cpu_set_fpuc(env, fpuc); 2810 cpu_set_fpus(env, fpus); 2811 2812 fptag ^= 0xff; 2813 for (i = 0; i < 8; i++) { 2814 env->fptags[i] = ((fptag >> i) & 1); 2815 } 2816 2817 addr = ptr + XO(legacy.fpregs); 2818 2819 for (i = 0; i < 8; i++) { 2820 floatx80 tmp = do_fldt(ac, addr); 2821 ST(i) = tmp; 2822 addr += 16; 2823 } 2824 } 2825 2826 static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr) 2827 { 2828 CPUX86State *env = ac->env; 2829 cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr))); 2830 } 2831 2832 static void do_xrstor_sse(X86Access *ac, target_ulong ptr) 2833 { 2834 CPUX86State *env = ac->env; 2835 int i, nb_xmm_regs; 2836 target_ulong addr; 2837 2838 if (env->hflags & HF_CS64_MASK) { 2839 nb_xmm_regs = 16; 2840 } else { 2841 nb_xmm_regs = 8; 2842 } 2843 2844 addr = ptr + XO(legacy.xmm_regs); 2845 for (i = 0; i < nb_xmm_regs; i++) { 2846 env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr); 2847 env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8); 2848 addr += 16; 2849 } 2850 } 2851 2852 static void do_clear_sse(CPUX86State *env) 2853 { 2854 int i, nb_xmm_regs; 2855 2856 if (env->hflags & HF_CS64_MASK) { 2857 nb_xmm_regs = 16; 2858 } else { 2859 nb_xmm_regs = 8; 2860 } 2861 2862 for (i = 0; i < nb_xmm_regs; i++) { 2863 env->xmm_regs[i].ZMM_Q(0) = 0; 2864 env->xmm_regs[i].ZMM_Q(1) = 0; 2865 } 2866 } 2867 2868 static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr) 2869 { 2870 CPUX86State *env = ac->env; 2871 int i, nb_xmm_regs; 2872 2873 if (env->hflags & HF_CS64_MASK) { 2874 nb_xmm_regs = 16; 2875 } else { 2876 nb_xmm_regs = 8; 2877 } 2878 2879 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2880 env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr); 2881 env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8); 2882 } 2883 } 2884 2885 static void do_clear_ymmh(CPUX86State *env) 2886 { 2887 int i, nb_xmm_regs; 2888 2889 if (env->hflags & HF_CS64_MASK) { 2890 nb_xmm_regs = 16; 2891 } else { 2892 nb_xmm_regs = 8; 2893 } 2894 2895 for (i = 0; i < nb_xmm_regs; i++) { 2896 env->xmm_regs[i].ZMM_Q(2) = 0; 2897 env->xmm_regs[i].ZMM_Q(3) = 0; 2898 } 2899 } 2900 2901 static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr) 2902 { 2903 CPUX86State *env = ac->env; 2904 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2905 int i; 2906 2907 for (i = 0; i < 4; i++, addr += 16) { 2908 env->bnd_regs[i].lb = access_ldq(ac, addr); 2909 env->bnd_regs[i].ub = access_ldq(ac, addr + 8); 2910 } 2911 } 2912 2913 static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr) 2914 { 2915 CPUX86State *env = ac->env; 2916 2917 /* FIXME: Extend highest implemented bit of linear address. */ 2918 env->bndcs_regs.cfgu 2919 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu)); 2920 env->bndcs_regs.sts 2921 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts)); 2922 } 2923 2924 static void do_xrstor_pkru(X86Access *ac, target_ulong ptr) 2925 { 2926 ac->env->pkru = access_ldq(ac, ptr); 2927 } 2928 2929 static void do_fxrstor(X86Access *ac, target_ulong ptr) 2930 { 2931 CPUX86State *env = ac->env; 2932 2933 do_xrstor_fpu(ac, ptr); 2934 if (env->cr[4] & CR4_OSFXSR_MASK) { 2935 do_xrstor_mxcsr(ac, ptr); 2936 /* Fast FXRSTOR leaves out the XMM registers */ 2937 if (!(env->efer & MSR_EFER_FFXSR) 2938 || (env->hflags & HF_CPL_MASK) 2939 || !(env->hflags & HF_LMA_MASK)) { 2940 do_xrstor_sse(ac, ptr); 2941 } 2942 } 2943 } 2944 2945 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2946 { 2947 uintptr_t ra = GETPC(); 2948 X86Access ac; 2949 2950 /* The operand must be 16 byte aligned */ 2951 if (ptr & 0xf) { 2952 raise_exception_ra(env, EXCP0D_GPF, ra); 2953 } 2954 2955 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), 2956 MMU_DATA_LOAD, ra); 2957 do_fxrstor(&ac, ptr); 2958 } 2959 2960 static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv, 2961 target_ulong ptr) 2962 { 2963 uint64_t xstate_bv, xcomp_bv, reserve0; 2964 2965 xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); 2966 xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv)); 2967 reserve0 = access_ldq(ac, ptr + XO(header.reserve0)); 2968 *pxsbv = xstate_bv; 2969 2970 /* 2971 * XCOMP_BV bit 63 indicates compact form, which we do not support, 2972 * and thus must raise #GP. That leaves us in standard form. 2973 * In standard form, bytes 23:8 must be zero -- which is both 2974 * XCOMP_BV and the following 64-bit field. 2975 */ 2976 if (xcomp_bv || reserve0) { 2977 return false; 2978 } 2979 2980 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2981 return (xstate_bv & ~ac->env->xcr0) == 0; 2982 } 2983 2984 static void do_xrstor(X86Access *ac, target_ulong ptr, 2985 uint64_t rfbm, uint64_t xstate_bv) 2986 { 2987 CPUX86State *env = ac->env; 2988 2989 if (rfbm & XSTATE_FP_MASK) { 2990 if (xstate_bv & XSTATE_FP_MASK) { 2991 do_xrstor_fpu(ac, ptr); 2992 } else { 2993 do_fninit(env); 2994 memset(env->fpregs, 0, sizeof(env->fpregs)); 2995 } 2996 } 2997 if (rfbm & XSTATE_SSE_MASK) { 2998 /* Note that the standard form of XRSTOR loads MXCSR from memory 2999 whether or not the XSTATE_BV bit is set. */ 3000 do_xrstor_mxcsr(ac, ptr); 3001 if (xstate_bv & XSTATE_SSE_MASK) { 3002 do_xrstor_sse(ac, ptr); 3003 } else { 3004 do_clear_sse(env); 3005 } 3006 } 3007 if (rfbm & XSTATE_YMM_MASK) { 3008 if (xstate_bv & XSTATE_YMM_MASK) { 3009 do_xrstor_ymmh(ac, ptr + XO(avx_state)); 3010 } else { 3011 do_clear_ymmh(env); 3012 } 3013 } 3014 if (rfbm & XSTATE_BNDREGS_MASK) { 3015 if (xstate_bv & XSTATE_BNDREGS_MASK) { 3016 do_xrstor_bndregs(ac, ptr + XO(bndreg_state)); 3017 env->hflags |= HF_MPX_IU_MASK; 3018 } else { 3019 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 3020 env->hflags &= ~HF_MPX_IU_MASK; 3021 } 3022 } 3023 if (rfbm & XSTATE_BNDCSR_MASK) { 3024 if (xstate_bv & XSTATE_BNDCSR_MASK) { 3025 do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state)); 3026 } else { 3027 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 3028 } 3029 cpu_sync_bndcs_hflags(env); 3030 } 3031 if (rfbm & XSTATE_PKRU_MASK) { 3032 uint64_t old_pkru = env->pkru; 3033 if (xstate_bv & XSTATE_PKRU_MASK) { 3034 do_xrstor_pkru(ac, ptr + XO(pkru_state)); 3035 } else { 3036 env->pkru = 0; 3037 } 3038 if (env->pkru != old_pkru) { 3039 CPUState *cs = env_cpu(env); 3040 tlb_flush(cs); 3041 } 3042 } 3043 } 3044 3045 #undef XO 3046 3047 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 3048 { 3049 uintptr_t ra = GETPC(); 3050 X86Access ac; 3051 uint64_t xstate_bv; 3052 unsigned size, size_ext; 3053 3054 do_xsave_chk(env, ptr, ra); 3055 3056 /* Begin with just the minimum size to validate the header. */ 3057 size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); 3058 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); 3059 if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) { 3060 raise_exception_ra(env, EXCP0D_GPF, ra); 3061 } 3062 3063 rfbm &= env->xcr0; 3064 size_ext = xsave_area_size(rfbm & xstate_bv, false); 3065 if (size < size_ext) { 3066 /* TODO: See if existing page probe has covered extra size. */ 3067 access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra); 3068 } 3069 3070 do_xrstor(&ac, ptr, rfbm, xstate_bv); 3071 } 3072 3073 #if defined(CONFIG_USER_ONLY) 3074 void cpu_x86_fsave(CPUX86State *env, void *host, size_t len) 3075 { 3076 X86Access ac = { 3077 .haddr1 = host, 3078 .size = 4 * 7 + 8 * 10, 3079 .env = env, 3080 }; 3081 3082 assert(ac.size <= len); 3083 do_fsave(&ac, 0, true); 3084 } 3085 3086 void cpu_x86_frstor(CPUX86State *env, void *host, size_t len) 3087 { 3088 X86Access ac = { 3089 .haddr1 = host, 3090 .size = 4 * 7 + 8 * 10, 3091 .env = env, 3092 }; 3093 3094 assert(ac.size <= len); 3095 do_frstor(&ac, 0, true); 3096 } 3097 3098 void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len) 3099 { 3100 X86Access ac = { 3101 .haddr1 = host, 3102 .size = sizeof(X86LegacyXSaveArea), 3103 .env = env, 3104 }; 3105 3106 assert(ac.size <= len); 3107 do_fxsave(&ac, 0); 3108 } 3109 3110 void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len) 3111 { 3112 X86Access ac = { 3113 .haddr1 = host, 3114 .size = sizeof(X86LegacyXSaveArea), 3115 .env = env, 3116 }; 3117 3118 assert(ac.size <= len); 3119 do_fxrstor(&ac, 0); 3120 } 3121 3122 void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm) 3123 { 3124 X86Access ac = { 3125 .haddr1 = host, 3126 .env = env, 3127 }; 3128 3129 /* 3130 * Since this is only called from user-level signal handling, 3131 * we should have done the job correctly there. 3132 */ 3133 assert((rfbm & ~env->xcr0) == 0); 3134 ac.size = xsave_area_size(rfbm, false); 3135 assert(ac.size <= len); 3136 do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm); 3137 } 3138 3139 bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm) 3140 { 3141 X86Access ac = { 3142 .haddr1 = host, 3143 .env = env, 3144 }; 3145 uint64_t xstate_bv; 3146 3147 /* 3148 * Since this is only called from user-level signal handling, 3149 * we should have done the job correctly there. 3150 */ 3151 assert((rfbm & ~env->xcr0) == 0); 3152 ac.size = xsave_area_size(rfbm, false); 3153 assert(ac.size <= len); 3154 3155 if (!valid_xrstor_header(&ac, &xstate_bv, 0)) { 3156 return false; 3157 } 3158 do_xrstor(&ac, 0, rfbm, xstate_bv); 3159 return true; 3160 } 3161 #endif 3162 3163 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 3164 { 3165 /* The OS must have enabled XSAVE. */ 3166 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3167 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3168 } 3169 3170 switch (ecx) { 3171 case 0: 3172 return env->xcr0; 3173 case 1: 3174 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 3175 return env->xcr0 & get_xinuse(env); 3176 } 3177 break; 3178 } 3179 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3180 } 3181 3182 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 3183 { 3184 uint32_t dummy, ena_lo, ena_hi; 3185 uint64_t ena; 3186 3187 /* The OS must have enabled XSAVE. */ 3188 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3189 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3190 } 3191 3192 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 3193 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 3194 goto do_gpf; 3195 } 3196 3197 /* SSE can be disabled, but only if AVX is disabled too. */ 3198 if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) { 3199 goto do_gpf; 3200 } 3201 3202 /* Disallow enabling unimplemented features. */ 3203 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 3204 ena = ((uint64_t)ena_hi << 32) | ena_lo; 3205 if (mask & ~ena) { 3206 goto do_gpf; 3207 } 3208 3209 /* Disallow enabling only half of MPX. */ 3210 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 3211 & XSTATE_BNDCSR_MASK) { 3212 goto do_gpf; 3213 } 3214 3215 env->xcr0 = mask; 3216 cpu_sync_bndcs_hflags(env); 3217 cpu_sync_avx_hflag(env); 3218 return; 3219 3220 do_gpf: 3221 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3222 } 3223 3224 /* MMX/SSE */ 3225 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 3226 3227 #define SSE_DAZ 0x0040 3228 #define SSE_RC_SHIFT 13 3229 #define SSE_RC_MASK (3 << SSE_RC_SHIFT) 3230 #define SSE_FZ 0x8000 3231 3232 void update_mxcsr_status(CPUX86State *env) 3233 { 3234 uint32_t mxcsr = env->mxcsr; 3235 int rnd_type; 3236 3237 /* set rounding mode */ 3238 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; 3239 set_x86_rounding_mode(rnd_type, &env->sse_status); 3240 3241 /* Set exception flags. */ 3242 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 3243 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 3244 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3245 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3246 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3247 &env->sse_status); 3248 3249 /* set denormals are zero */ 3250 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3251 3252 /* set flush to zero */ 3253 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3254 } 3255 3256 void update_mxcsr_from_sse_status(CPUX86State *env) 3257 { 3258 uint8_t flags = get_float_exception_flags(&env->sse_status); 3259 /* 3260 * The MXCSR denormal flag has opposite semantics to 3261 * float_flag_input_denormal (the softfloat code sets that flag 3262 * only when flushing input denormals to zero, but SSE sets it 3263 * only when not flushing them to zero), so is not converted 3264 * here. 3265 */ 3266 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3267 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3268 (flags & float_flag_overflow ? FPUS_OE : 0) | 3269 (flags & float_flag_underflow ? FPUS_UE : 0) | 3270 (flags & float_flag_inexact ? FPUS_PE : 0) | 3271 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3272 0)); 3273 } 3274 3275 void helper_update_mxcsr(CPUX86State *env) 3276 { 3277 update_mxcsr_from_sse_status(env); 3278 } 3279 3280 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3281 { 3282 cpu_set_mxcsr(env, val); 3283 } 3284 3285 void helper_enter_mmx(CPUX86State *env) 3286 { 3287 env->fpstt = 0; 3288 *(uint32_t *)(env->fptags) = 0; 3289 *(uint32_t *)(env->fptags + 4) = 0; 3290 } 3291 3292 void helper_emms(CPUX86State *env) 3293 { 3294 /* set to empty state */ 3295 *(uint32_t *)(env->fptags) = 0x01010101; 3296 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3297 } 3298 3299 #define SHIFT 0 3300 #include "ops_sse.h" 3301 3302 #define SHIFT 1 3303 #include "ops_sse.h" 3304 3305 #define SHIFT 2 3306 #include "ops_sse.h" 3307