1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "fpu/softfloat-macros.h" 29 #include "helper-tcg.h" 30 #include "access.h" 31 32 /* float macros */ 33 #define FT0 (env->ft0) 34 #define ST0 (env->fpregs[env->fpstt].d) 35 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 36 #define ST1 ST(1) 37 38 #define FPU_RC_SHIFT 10 39 #define FPU_RC_MASK (3 << FPU_RC_SHIFT) 40 #define FPU_RC_NEAR 0x000 41 #define FPU_RC_DOWN 0x400 42 #define FPU_RC_UP 0x800 43 #define FPU_RC_CHOP 0xc00 44 45 #define MAXTAN 9223372036854775808.0 46 47 /* the following deal with x86 long double-precision numbers */ 48 #define MAXEXPD 0x7fff 49 #define EXPBIAS 16383 50 #define EXPD(fp) (fp.l.upper & 0x7fff) 51 #define SIGND(fp) ((fp.l.upper) & 0x8000) 52 #define MANTD(fp) (fp.l.lower) 53 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 54 55 #define FPUS_IE (1 << 0) 56 #define FPUS_DE (1 << 1) 57 #define FPUS_ZE (1 << 2) 58 #define FPUS_OE (1 << 3) 59 #define FPUS_UE (1 << 4) 60 #define FPUS_PE (1 << 5) 61 #define FPUS_SF (1 << 6) 62 #define FPUS_SE (1 << 7) 63 #define FPUS_B (1 << 15) 64 65 #define FPUC_EM 0x3f 66 67 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 68 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 69 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 70 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 71 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 72 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 73 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 74 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 75 76 static inline void fpush(CPUX86State *env) 77 { 78 env->fpstt = (env->fpstt - 1) & 7; 79 env->fptags[env->fpstt] = 0; /* validate stack entry */ 80 } 81 82 static inline void fpop(CPUX86State *env) 83 { 84 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 85 env->fpstt = (env->fpstt + 1) & 7; 86 } 87 88 static floatx80 do_fldt(X86Access *ac, target_ulong ptr) 89 { 90 CPU_LDoubleU temp; 91 92 temp.l.lower = access_ldq(ac, ptr); 93 temp.l.upper = access_ldw(ac, ptr + 8); 94 return temp.d; 95 } 96 97 static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f) 98 { 99 CPU_LDoubleU temp; 100 101 temp.d = f; 102 access_stq(ac, ptr, temp.l.lower); 103 access_stw(ac, ptr + 8, temp.l.upper); 104 } 105 106 /* x87 FPU helpers */ 107 108 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 109 { 110 union { 111 float64 f64; 112 double d; 113 } u; 114 115 u.f64 = floatx80_to_float64(a, &env->fp_status); 116 return u.d; 117 } 118 119 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 120 { 121 union { 122 float64 f64; 123 double d; 124 } u; 125 126 u.d = a; 127 return float64_to_floatx80(u.f64, &env->fp_status); 128 } 129 130 static void fpu_set_exception(CPUX86State *env, int mask) 131 { 132 env->fpus |= mask; 133 if (env->fpus & (~env->fpuc & FPUC_EM)) { 134 env->fpus |= FPUS_SE | FPUS_B; 135 } 136 } 137 138 void cpu_init_fp_statuses(CPUX86State *env) 139 { 140 /* 141 * Initialise the non-runtime-varying fields of the various 142 * float_status words to x86 behaviour. This must be called at 143 * CPU reset because the float_status words are in the 144 * "zeroed on reset" portion of the CPU state struct. 145 * Fields in float_status that vary under guest control are set 146 * via the codepath for setting that register, eg cpu_set_fpuc(). 147 */ 148 /* 149 * Use x87 NaN propagation rules: 150 * SNaN + QNaN => return the QNaN 151 * two SNaNs => return the one with the larger significand, silenced 152 * two QNaNs => return the one with the larger significand 153 * SNaN and a non-NaN => return the SNaN, silenced 154 * QNaN and a non-NaN => return the QNaN 155 * 156 * If we get down to comparing significands and they are the same, 157 * return the NaN with the positive sign bit (if any). 158 */ 159 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); 160 /* 161 * TODO: These are incorrect: the x86 Software Developer's Manual vol 1 162 * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the 163 * "larger significand" behaviour is only used for x87 FPU operations. 164 * For SSE the required behaviour is to always return the first NaN, 165 * which is float_2nan_prop_ab. 166 * 167 * mmx_status is used only for the AMD 3DNow! instructions, which 168 * are documented in the "3DNow! Technology Manual" as not supporting 169 * NaNs or infinities as inputs. The result of passing two NaNs is 170 * documented as "undefined", so we can do what we choose. 171 * (Strictly there is some behaviour we don't implement correctly 172 * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".) 173 */ 174 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status); 175 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status); 176 /* 177 * Only SSE has multiply-add instructions. In the SDM Section 14.5.2 178 * "Fused-Multiply-ADD (FMA) Numeric Behavior" the NaN handling is 179 * specified -- for 0 * inf + NaN the input NaN is selected, and if 180 * there are multiple input NaNs they are selected in the order a, b, c. 181 * We also do not raise Invalid for the 0 * inf + (Q)NaN case. 182 */ 183 set_float_infzeronan_rule(float_infzeronan_dnan_never | 184 float_infzeronan_suppress_invalid, 185 &env->sse_status); 186 set_float_3nan_prop_rule(float_3nan_prop_abc, &env->sse_status); 187 /* Default NaN: sign bit set, most significant frac bit set */ 188 set_float_default_nan_pattern(0b11000000, &env->fp_status); 189 set_float_default_nan_pattern(0b11000000, &env->mmx_status); 190 set_float_default_nan_pattern(0b11000000, &env->sse_status); 191 } 192 193 static inline uint8_t save_exception_flags(CPUX86State *env) 194 { 195 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 196 set_float_exception_flags(0, &env->fp_status); 197 return old_flags; 198 } 199 200 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 201 { 202 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 203 float_raise(old_flags, &env->fp_status); 204 fpu_set_exception(env, 205 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 206 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 207 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 208 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 209 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 210 (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0))); 211 } 212 213 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 214 { 215 uint8_t old_flags = save_exception_flags(env); 216 floatx80 ret = floatx80_div(a, b, &env->fp_status); 217 merge_exception_flags(env, old_flags); 218 return ret; 219 } 220 221 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 222 { 223 if (env->cr[0] & CR0_NE_MASK) { 224 raise_exception_ra(env, EXCP10_COPR, retaddr); 225 } 226 #if !defined(CONFIG_USER_ONLY) 227 else { 228 fpu_check_raise_ferr_irq(env); 229 } 230 #endif 231 } 232 233 void helper_flds_FT0(CPUX86State *env, uint32_t val) 234 { 235 uint8_t old_flags = save_exception_flags(env); 236 union { 237 float32 f; 238 uint32_t i; 239 } u; 240 241 u.i = val; 242 FT0 = float32_to_floatx80(u.f, &env->fp_status); 243 merge_exception_flags(env, old_flags); 244 } 245 246 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 247 { 248 uint8_t old_flags = save_exception_flags(env); 249 union { 250 float64 f; 251 uint64_t i; 252 } u; 253 254 u.i = val; 255 FT0 = float64_to_floatx80(u.f, &env->fp_status); 256 merge_exception_flags(env, old_flags); 257 } 258 259 void helper_fildl_FT0(CPUX86State *env, int32_t val) 260 { 261 FT0 = int32_to_floatx80(val, &env->fp_status); 262 } 263 264 void helper_flds_ST0(CPUX86State *env, uint32_t val) 265 { 266 uint8_t old_flags = save_exception_flags(env); 267 int new_fpstt; 268 union { 269 float32 f; 270 uint32_t i; 271 } u; 272 273 new_fpstt = (env->fpstt - 1) & 7; 274 u.i = val; 275 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 276 env->fpstt = new_fpstt; 277 env->fptags[new_fpstt] = 0; /* validate stack entry */ 278 merge_exception_flags(env, old_flags); 279 } 280 281 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 282 { 283 uint8_t old_flags = save_exception_flags(env); 284 int new_fpstt; 285 union { 286 float64 f; 287 uint64_t i; 288 } u; 289 290 new_fpstt = (env->fpstt - 1) & 7; 291 u.i = val; 292 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 293 env->fpstt = new_fpstt; 294 env->fptags[new_fpstt] = 0; /* validate stack entry */ 295 merge_exception_flags(env, old_flags); 296 } 297 298 static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 299 { 300 FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 301 set_floatx80_rounding_precision(floatx80_precision_x, st); 302 return old; 303 } 304 305 void helper_fildl_ST0(CPUX86State *env, int32_t val) 306 { 307 int new_fpstt; 308 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 309 310 new_fpstt = (env->fpstt - 1) & 7; 311 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 312 env->fpstt = new_fpstt; 313 env->fptags[new_fpstt] = 0; /* validate stack entry */ 314 315 set_floatx80_rounding_precision(old, &env->fp_status); 316 } 317 318 void helper_fildll_ST0(CPUX86State *env, int64_t val) 319 { 320 int new_fpstt; 321 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 322 323 new_fpstt = (env->fpstt - 1) & 7; 324 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 325 env->fpstt = new_fpstt; 326 env->fptags[new_fpstt] = 0; /* validate stack entry */ 327 328 set_floatx80_rounding_precision(old, &env->fp_status); 329 } 330 331 uint32_t helper_fsts_ST0(CPUX86State *env) 332 { 333 uint8_t old_flags = save_exception_flags(env); 334 union { 335 float32 f; 336 uint32_t i; 337 } u; 338 339 u.f = floatx80_to_float32(ST0, &env->fp_status); 340 merge_exception_flags(env, old_flags); 341 return u.i; 342 } 343 344 uint64_t helper_fstl_ST0(CPUX86State *env) 345 { 346 uint8_t old_flags = save_exception_flags(env); 347 union { 348 float64 f; 349 uint64_t i; 350 } u; 351 352 u.f = floatx80_to_float64(ST0, &env->fp_status); 353 merge_exception_flags(env, old_flags); 354 return u.i; 355 } 356 357 int32_t helper_fist_ST0(CPUX86State *env) 358 { 359 uint8_t old_flags = save_exception_flags(env); 360 int32_t val; 361 362 val = floatx80_to_int32(ST0, &env->fp_status); 363 if (val != (int16_t)val) { 364 set_float_exception_flags(float_flag_invalid, &env->fp_status); 365 val = -32768; 366 } 367 merge_exception_flags(env, old_flags); 368 return val; 369 } 370 371 int32_t helper_fistl_ST0(CPUX86State *env) 372 { 373 uint8_t old_flags = save_exception_flags(env); 374 int32_t val; 375 376 val = floatx80_to_int32(ST0, &env->fp_status); 377 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 378 val = 0x80000000; 379 } 380 merge_exception_flags(env, old_flags); 381 return val; 382 } 383 384 int64_t helper_fistll_ST0(CPUX86State *env) 385 { 386 uint8_t old_flags = save_exception_flags(env); 387 int64_t val; 388 389 val = floatx80_to_int64(ST0, &env->fp_status); 390 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 391 val = 0x8000000000000000ULL; 392 } 393 merge_exception_flags(env, old_flags); 394 return val; 395 } 396 397 int32_t helper_fistt_ST0(CPUX86State *env) 398 { 399 uint8_t old_flags = save_exception_flags(env); 400 int32_t val; 401 402 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 403 if (val != (int16_t)val) { 404 set_float_exception_flags(float_flag_invalid, &env->fp_status); 405 val = -32768; 406 } 407 merge_exception_flags(env, old_flags); 408 return val; 409 } 410 411 int32_t helper_fisttl_ST0(CPUX86State *env) 412 { 413 uint8_t old_flags = save_exception_flags(env); 414 int32_t val; 415 416 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 417 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 418 val = 0x80000000; 419 } 420 merge_exception_flags(env, old_flags); 421 return val; 422 } 423 424 int64_t helper_fisttll_ST0(CPUX86State *env) 425 { 426 uint8_t old_flags = save_exception_flags(env); 427 int64_t val; 428 429 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 430 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 431 val = 0x8000000000000000ULL; 432 } 433 merge_exception_flags(env, old_flags); 434 return val; 435 } 436 437 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 438 { 439 int new_fpstt; 440 X86Access ac; 441 442 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); 443 444 new_fpstt = (env->fpstt - 1) & 7; 445 env->fpregs[new_fpstt].d = do_fldt(&ac, ptr); 446 env->fpstt = new_fpstt; 447 env->fptags[new_fpstt] = 0; /* validate stack entry */ 448 } 449 450 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 451 { 452 X86Access ac; 453 454 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); 455 do_fstt(&ac, ptr, ST0); 456 } 457 458 void helper_fpush(CPUX86State *env) 459 { 460 fpush(env); 461 } 462 463 void helper_fpop(CPUX86State *env) 464 { 465 fpop(env); 466 } 467 468 void helper_fdecstp(CPUX86State *env) 469 { 470 env->fpstt = (env->fpstt - 1) & 7; 471 env->fpus &= ~0x4700; 472 } 473 474 void helper_fincstp(CPUX86State *env) 475 { 476 env->fpstt = (env->fpstt + 1) & 7; 477 env->fpus &= ~0x4700; 478 } 479 480 /* FPU move */ 481 482 void helper_ffree_STN(CPUX86State *env, int st_index) 483 { 484 env->fptags[(env->fpstt + st_index) & 7] = 1; 485 } 486 487 void helper_fmov_ST0_FT0(CPUX86State *env) 488 { 489 ST0 = FT0; 490 } 491 492 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 493 { 494 FT0 = ST(st_index); 495 } 496 497 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 498 { 499 ST0 = ST(st_index); 500 } 501 502 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 503 { 504 ST(st_index) = ST0; 505 } 506 507 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 508 { 509 floatx80 tmp; 510 511 tmp = ST(st_index); 512 ST(st_index) = ST0; 513 ST0 = tmp; 514 } 515 516 /* FPU operations */ 517 518 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 519 520 void helper_fcom_ST0_FT0(CPUX86State *env) 521 { 522 uint8_t old_flags = save_exception_flags(env); 523 FloatRelation ret; 524 525 ret = floatx80_compare(ST0, FT0, &env->fp_status); 526 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 527 merge_exception_flags(env, old_flags); 528 } 529 530 void helper_fucom_ST0_FT0(CPUX86State *env) 531 { 532 uint8_t old_flags = save_exception_flags(env); 533 FloatRelation ret; 534 535 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 536 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 537 merge_exception_flags(env, old_flags); 538 } 539 540 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 541 542 void helper_fcomi_ST0_FT0(CPUX86State *env) 543 { 544 uint8_t old_flags = save_exception_flags(env); 545 int eflags; 546 FloatRelation ret; 547 548 ret = floatx80_compare(ST0, FT0, &env->fp_status); 549 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 550 CC_SRC = eflags | fcomi_ccval[ret + 1]; 551 CC_OP = CC_OP_EFLAGS; 552 merge_exception_flags(env, old_flags); 553 } 554 555 void helper_fucomi_ST0_FT0(CPUX86State *env) 556 { 557 uint8_t old_flags = save_exception_flags(env); 558 int eflags; 559 FloatRelation ret; 560 561 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 562 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 563 CC_SRC = eflags | fcomi_ccval[ret + 1]; 564 CC_OP = CC_OP_EFLAGS; 565 merge_exception_flags(env, old_flags); 566 } 567 568 void helper_fadd_ST0_FT0(CPUX86State *env) 569 { 570 uint8_t old_flags = save_exception_flags(env); 571 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 572 merge_exception_flags(env, old_flags); 573 } 574 575 void helper_fmul_ST0_FT0(CPUX86State *env) 576 { 577 uint8_t old_flags = save_exception_flags(env); 578 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 579 merge_exception_flags(env, old_flags); 580 } 581 582 void helper_fsub_ST0_FT0(CPUX86State *env) 583 { 584 uint8_t old_flags = save_exception_flags(env); 585 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 586 merge_exception_flags(env, old_flags); 587 } 588 589 void helper_fsubr_ST0_FT0(CPUX86State *env) 590 { 591 uint8_t old_flags = save_exception_flags(env); 592 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 593 merge_exception_flags(env, old_flags); 594 } 595 596 void helper_fdiv_ST0_FT0(CPUX86State *env) 597 { 598 ST0 = helper_fdiv(env, ST0, FT0); 599 } 600 601 void helper_fdivr_ST0_FT0(CPUX86State *env) 602 { 603 ST0 = helper_fdiv(env, FT0, ST0); 604 } 605 606 /* fp operations between STN and ST0 */ 607 608 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 609 { 610 uint8_t old_flags = save_exception_flags(env); 611 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 612 merge_exception_flags(env, old_flags); 613 } 614 615 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 616 { 617 uint8_t old_flags = save_exception_flags(env); 618 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 619 merge_exception_flags(env, old_flags); 620 } 621 622 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 623 { 624 uint8_t old_flags = save_exception_flags(env); 625 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 626 merge_exception_flags(env, old_flags); 627 } 628 629 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 630 { 631 uint8_t old_flags = save_exception_flags(env); 632 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 633 merge_exception_flags(env, old_flags); 634 } 635 636 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 637 { 638 floatx80 *p; 639 640 p = &ST(st_index); 641 *p = helper_fdiv(env, *p, ST0); 642 } 643 644 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 645 { 646 floatx80 *p; 647 648 p = &ST(st_index); 649 *p = helper_fdiv(env, ST0, *p); 650 } 651 652 /* misc FPU operations */ 653 void helper_fchs_ST0(CPUX86State *env) 654 { 655 ST0 = floatx80_chs(ST0); 656 } 657 658 void helper_fabs_ST0(CPUX86State *env) 659 { 660 ST0 = floatx80_abs(ST0); 661 } 662 663 void helper_fld1_ST0(CPUX86State *env) 664 { 665 ST0 = floatx80_one; 666 } 667 668 void helper_fldl2t_ST0(CPUX86State *env) 669 { 670 switch (env->fpuc & FPU_RC_MASK) { 671 case FPU_RC_UP: 672 ST0 = floatx80_l2t_u; 673 break; 674 default: 675 ST0 = floatx80_l2t; 676 break; 677 } 678 } 679 680 void helper_fldl2e_ST0(CPUX86State *env) 681 { 682 switch (env->fpuc & FPU_RC_MASK) { 683 case FPU_RC_DOWN: 684 case FPU_RC_CHOP: 685 ST0 = floatx80_l2e_d; 686 break; 687 default: 688 ST0 = floatx80_l2e; 689 break; 690 } 691 } 692 693 void helper_fldpi_ST0(CPUX86State *env) 694 { 695 switch (env->fpuc & FPU_RC_MASK) { 696 case FPU_RC_DOWN: 697 case FPU_RC_CHOP: 698 ST0 = floatx80_pi_d; 699 break; 700 default: 701 ST0 = floatx80_pi; 702 break; 703 } 704 } 705 706 void helper_fldlg2_ST0(CPUX86State *env) 707 { 708 switch (env->fpuc & FPU_RC_MASK) { 709 case FPU_RC_DOWN: 710 case FPU_RC_CHOP: 711 ST0 = floatx80_lg2_d; 712 break; 713 default: 714 ST0 = floatx80_lg2; 715 break; 716 } 717 } 718 719 void helper_fldln2_ST0(CPUX86State *env) 720 { 721 switch (env->fpuc & FPU_RC_MASK) { 722 case FPU_RC_DOWN: 723 case FPU_RC_CHOP: 724 ST0 = floatx80_ln2_d; 725 break; 726 default: 727 ST0 = floatx80_ln2; 728 break; 729 } 730 } 731 732 void helper_fldz_ST0(CPUX86State *env) 733 { 734 ST0 = floatx80_zero; 735 } 736 737 void helper_fldz_FT0(CPUX86State *env) 738 { 739 FT0 = floatx80_zero; 740 } 741 742 uint32_t helper_fnstsw(CPUX86State *env) 743 { 744 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 745 } 746 747 uint32_t helper_fnstcw(CPUX86State *env) 748 { 749 return env->fpuc; 750 } 751 752 static void set_x86_rounding_mode(unsigned mode, float_status *status) 753 { 754 static FloatRoundMode x86_round_mode[4] = { 755 float_round_nearest_even, 756 float_round_down, 757 float_round_up, 758 float_round_to_zero 759 }; 760 assert(mode < ARRAY_SIZE(x86_round_mode)); 761 set_float_rounding_mode(x86_round_mode[mode], status); 762 } 763 764 void update_fp_status(CPUX86State *env) 765 { 766 int rnd_mode; 767 FloatX80RoundPrec rnd_prec; 768 769 /* set rounding mode */ 770 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; 771 set_x86_rounding_mode(rnd_mode, &env->fp_status); 772 773 switch ((env->fpuc >> 8) & 3) { 774 case 0: 775 rnd_prec = floatx80_precision_s; 776 break; 777 case 2: 778 rnd_prec = floatx80_precision_d; 779 break; 780 case 3: 781 default: 782 rnd_prec = floatx80_precision_x; 783 break; 784 } 785 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 786 } 787 788 void helper_fldcw(CPUX86State *env, uint32_t val) 789 { 790 cpu_set_fpuc(env, val); 791 } 792 793 void helper_fclex(CPUX86State *env) 794 { 795 env->fpus &= 0x7f00; 796 } 797 798 void helper_fwait(CPUX86State *env) 799 { 800 if (env->fpus & FPUS_SE) { 801 fpu_raise_exception(env, GETPC()); 802 } 803 } 804 805 static void do_fninit(CPUX86State *env) 806 { 807 env->fpus = 0; 808 env->fpstt = 0; 809 env->fpcs = 0; 810 env->fpds = 0; 811 env->fpip = 0; 812 env->fpdp = 0; 813 cpu_set_fpuc(env, 0x37f); 814 env->fptags[0] = 1; 815 env->fptags[1] = 1; 816 env->fptags[2] = 1; 817 env->fptags[3] = 1; 818 env->fptags[4] = 1; 819 env->fptags[5] = 1; 820 env->fptags[6] = 1; 821 env->fptags[7] = 1; 822 } 823 824 void helper_fninit(CPUX86State *env) 825 { 826 do_fninit(env); 827 } 828 829 /* BCD ops */ 830 831 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 832 { 833 X86Access ac; 834 floatx80 tmp; 835 uint64_t val; 836 unsigned int v; 837 int i; 838 839 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); 840 841 val = 0; 842 for (i = 8; i >= 0; i--) { 843 v = access_ldb(&ac, ptr + i); 844 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 845 } 846 tmp = int64_to_floatx80(val, &env->fp_status); 847 if (access_ldb(&ac, ptr + 9) & 0x80) { 848 tmp = floatx80_chs(tmp); 849 } 850 fpush(env); 851 ST0 = tmp; 852 } 853 854 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 855 { 856 uint8_t old_flags = save_exception_flags(env); 857 int v; 858 target_ulong mem_ref, mem_end; 859 int64_t val; 860 CPU_LDoubleU temp; 861 X86Access ac; 862 863 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); 864 temp.d = ST0; 865 866 val = floatx80_to_int64(ST0, &env->fp_status); 867 mem_ref = ptr; 868 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 869 set_float_exception_flags(float_flag_invalid, &env->fp_status); 870 while (mem_ref < ptr + 7) { 871 access_stb(&ac, mem_ref++, 0); 872 } 873 access_stb(&ac, mem_ref++, 0xc0); 874 access_stb(&ac, mem_ref++, 0xff); 875 access_stb(&ac, mem_ref++, 0xff); 876 merge_exception_flags(env, old_flags); 877 return; 878 } 879 mem_end = mem_ref + 9; 880 if (SIGND(temp)) { 881 access_stb(&ac, mem_end, 0x80); 882 val = -val; 883 } else { 884 access_stb(&ac, mem_end, 0x00); 885 } 886 while (mem_ref < mem_end) { 887 if (val == 0) { 888 break; 889 } 890 v = val % 100; 891 val = val / 100; 892 v = ((v / 10) << 4) | (v % 10); 893 access_stb(&ac, mem_ref++, v); 894 } 895 while (mem_ref < mem_end) { 896 access_stb(&ac, mem_ref++, 0); 897 } 898 merge_exception_flags(env, old_flags); 899 } 900 901 /* 128-bit significand of log(2). */ 902 #define ln2_sig_high 0xb17217f7d1cf79abULL 903 #define ln2_sig_low 0xc9e3b39803f2f6afULL 904 905 /* 906 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 907 * the interval [-1/64, 1/64]. 908 */ 909 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 910 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 911 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 912 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 913 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 914 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 915 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 916 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 917 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 918 919 struct f2xm1_data { 920 /* 921 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 922 * are very close to exact floatx80 values. 923 */ 924 floatx80 t; 925 /* The value of 2^t. */ 926 floatx80 exp2; 927 /* The value of 2^t - 1. */ 928 floatx80 exp2m1; 929 }; 930 931 static const struct f2xm1_data f2xm1_table[65] = { 932 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 933 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 934 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 935 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 936 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 937 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 938 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 939 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 940 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 941 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 942 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 943 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 944 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 945 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 946 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 947 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 948 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 949 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 950 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 951 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 952 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 953 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 954 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 955 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 956 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 957 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 958 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 959 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 960 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 961 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 962 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 963 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 964 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 965 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 966 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 967 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 968 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 969 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 970 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 971 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 972 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 973 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 974 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 975 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 976 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 977 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 978 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 979 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 980 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 981 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 982 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 983 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 984 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 985 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 986 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 987 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 988 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 989 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 990 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 991 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 992 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 993 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 994 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 995 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 996 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 997 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 998 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 999 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 1000 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 1001 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 1002 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 1003 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 1004 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 1005 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 1006 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 1007 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 1008 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 1009 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 1010 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 1011 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 1012 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 1013 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 1014 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 1015 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 1016 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 1017 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 1018 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 1019 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 1020 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 1021 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 1022 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 1023 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 1024 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 1025 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 1026 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 1027 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 1028 { floatx80_zero_init, 1029 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1030 floatx80_zero_init }, 1031 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 1032 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 1033 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 1034 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 1035 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 1036 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 1037 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 1038 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 1039 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 1040 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 1041 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 1042 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 1043 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 1044 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 1045 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 1046 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 1047 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 1048 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 1049 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 1050 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 1051 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 1052 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 1053 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 1054 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 1055 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 1056 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 1057 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 1058 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 1059 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 1060 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 1061 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 1062 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 1063 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 1064 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 1065 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 1066 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1067 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1068 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1069 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1070 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1071 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1072 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1073 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1074 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1075 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1076 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1077 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1078 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1079 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1080 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1081 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1082 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1083 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1084 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1085 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1086 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1087 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1088 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1089 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1090 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1091 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1092 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1093 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1094 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1095 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1096 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1097 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1098 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1099 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1100 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1101 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1102 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1103 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1104 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1105 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1106 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1107 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1108 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1109 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1110 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1111 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1112 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1113 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1114 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1115 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1116 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1117 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1118 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1119 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1120 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1121 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1122 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1123 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1124 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1125 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1126 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1127 }; 1128 1129 void helper_f2xm1(CPUX86State *env) 1130 { 1131 uint8_t old_flags = save_exception_flags(env); 1132 uint64_t sig = extractFloatx80Frac(ST0); 1133 int32_t exp = extractFloatx80Exp(ST0); 1134 bool sign = extractFloatx80Sign(ST0); 1135 1136 if (floatx80_invalid_encoding(ST0)) { 1137 float_raise(float_flag_invalid, &env->fp_status); 1138 ST0 = floatx80_default_nan(&env->fp_status); 1139 } else if (floatx80_is_any_nan(ST0)) { 1140 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1141 float_raise(float_flag_invalid, &env->fp_status); 1142 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1143 } 1144 } else if (exp > 0x3fff || 1145 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1146 /* Out of range for the instruction, treat as invalid. */ 1147 float_raise(float_flag_invalid, &env->fp_status); 1148 ST0 = floatx80_default_nan(&env->fp_status); 1149 } else if (exp == 0x3fff) { 1150 /* Argument 1 or -1, exact result 1 or -0.5. */ 1151 if (sign) { 1152 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1153 } 1154 } else if (exp < 0x3fb0) { 1155 if (!floatx80_is_zero(ST0)) { 1156 /* 1157 * Multiplying the argument by an extra-precision version 1158 * of log(2) is sufficiently precise. Zero arguments are 1159 * returned unchanged. 1160 */ 1161 uint64_t sig0, sig1, sig2; 1162 if (exp == 0) { 1163 normalizeFloatx80Subnormal(sig, &exp, &sig); 1164 } 1165 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1166 &sig2); 1167 /* This result is inexact. */ 1168 sig1 |= 1; 1169 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1170 sign, exp, sig0, sig1, 1171 &env->fp_status); 1172 } 1173 } else { 1174 floatx80 tmp, y, accum; 1175 bool asign, bsign; 1176 int32_t n, aexp, bexp; 1177 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1178 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1179 FloatX80RoundPrec save_prec = 1180 env->fp_status.floatx80_rounding_precision; 1181 env->fp_status.float_rounding_mode = float_round_nearest_even; 1182 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1183 1184 /* Find the nearest multiple of 1/32 to the argument. */ 1185 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1186 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1187 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1188 1189 if (floatx80_is_zero(y)) { 1190 /* 1191 * Use the value of 2^t - 1 from the table, to avoid 1192 * needing to special-case zero as a result of 1193 * multiplication below. 1194 */ 1195 ST0 = f2xm1_table[n].t; 1196 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1197 env->fp_status.float_rounding_mode = save_mode; 1198 } else { 1199 /* 1200 * Compute the lower parts of a polynomial expansion for 1201 * (2^y - 1) / y. 1202 */ 1203 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1204 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1205 accum = floatx80_mul(accum, y, &env->fp_status); 1206 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1207 accum = floatx80_mul(accum, y, &env->fp_status); 1208 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1209 accum = floatx80_mul(accum, y, &env->fp_status); 1210 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1211 accum = floatx80_mul(accum, y, &env->fp_status); 1212 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1213 accum = floatx80_mul(accum, y, &env->fp_status); 1214 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1215 accum = floatx80_mul(accum, y, &env->fp_status); 1216 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1217 1218 /* 1219 * The full polynomial expansion is f2xm1_coeff_0 + accum 1220 * (where accum has much lower magnitude, and so, in 1221 * particular, carry out of the addition is not possible). 1222 * (This expansion is only accurate to about 70 bits, not 1223 * 128 bits.) 1224 */ 1225 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1226 asign = extractFloatx80Sign(f2xm1_coeff_0); 1227 shift128RightJamming(extractFloatx80Frac(accum), 0, 1228 aexp - extractFloatx80Exp(accum), 1229 &asig0, &asig1); 1230 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1231 bsig1 = 0; 1232 if (asign == extractFloatx80Sign(accum)) { 1233 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1234 } else { 1235 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1236 } 1237 /* And thus compute an approximation to 2^y - 1. */ 1238 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1239 &asig0, &asig1, &asig2); 1240 aexp += extractFloatx80Exp(y) - 0x3ffe; 1241 asign ^= extractFloatx80Sign(y); 1242 if (n != 32) { 1243 /* 1244 * Multiply this by the precomputed value of 2^t and 1245 * add that of 2^t - 1. 1246 */ 1247 mul128By64To192(asig0, asig1, 1248 extractFloatx80Frac(f2xm1_table[n].exp2), 1249 &asig0, &asig1, &asig2); 1250 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1251 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1252 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1253 bsig1 = 0; 1254 if (bexp < aexp) { 1255 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1256 &bsig0, &bsig1); 1257 } else if (aexp < bexp) { 1258 shift128RightJamming(asig0, asig1, bexp - aexp, 1259 &asig0, &asig1); 1260 aexp = bexp; 1261 } 1262 /* The sign of 2^t - 1 is always that of the result. */ 1263 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1264 if (asign == bsign) { 1265 /* Avoid possible carry out of the addition. */ 1266 shift128RightJamming(asig0, asig1, 1, 1267 &asig0, &asig1); 1268 shift128RightJamming(bsig0, bsig1, 1, 1269 &bsig0, &bsig1); 1270 ++aexp; 1271 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1272 } else { 1273 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1274 asign = bsign; 1275 } 1276 } 1277 env->fp_status.float_rounding_mode = save_mode; 1278 /* This result is inexact. */ 1279 asig1 |= 1; 1280 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1281 asign, aexp, asig0, asig1, 1282 &env->fp_status); 1283 } 1284 1285 env->fp_status.floatx80_rounding_precision = save_prec; 1286 } 1287 merge_exception_flags(env, old_flags); 1288 } 1289 1290 void helper_fptan(CPUX86State *env) 1291 { 1292 double fptemp = floatx80_to_double(env, ST0); 1293 1294 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1295 env->fpus |= 0x400; 1296 } else { 1297 fptemp = tan(fptemp); 1298 ST0 = double_to_floatx80(env, fptemp); 1299 fpush(env); 1300 ST0 = floatx80_one; 1301 env->fpus &= ~0x400; /* C2 <-- 0 */ 1302 /* the above code is for |arg| < 2**52 only */ 1303 } 1304 } 1305 1306 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1307 #define pi_4_exp 0x3ffe 1308 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1309 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1310 #define pi_2_exp 0x3fff 1311 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1312 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1313 #define pi_34_exp 0x4000 1314 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1315 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1316 #define pi_exp 0x4000 1317 #define pi_sig_high 0xc90fdaa22168c234ULL 1318 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1319 1320 /* 1321 * Polynomial coefficients for an approximation to atan(x), with only 1322 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1323 * for some other approximations, no low part is needed for the first 1324 * coefficient here to achieve a sufficiently accurate result, because 1325 * the coefficient in this minimax approximation is very close to 1326 * exactly 1.) 1327 */ 1328 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1329 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1330 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1331 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1332 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1333 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1334 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1335 1336 struct fpatan_data { 1337 /* High and low parts of atan(x). */ 1338 floatx80 atan_high, atan_low; 1339 }; 1340 1341 static const struct fpatan_data fpatan_table[9] = { 1342 { floatx80_zero_init, 1343 floatx80_zero_init }, 1344 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1345 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1346 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1347 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1348 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1349 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1350 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1351 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1352 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1353 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1354 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1355 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1356 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1357 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1358 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1359 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1360 }; 1361 1362 void helper_fpatan(CPUX86State *env) 1363 { 1364 uint8_t old_flags = save_exception_flags(env); 1365 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1366 int32_t arg0_exp = extractFloatx80Exp(ST0); 1367 bool arg0_sign = extractFloatx80Sign(ST0); 1368 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1369 int32_t arg1_exp = extractFloatx80Exp(ST1); 1370 bool arg1_sign = extractFloatx80Sign(ST1); 1371 1372 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1373 float_raise(float_flag_invalid, &env->fp_status); 1374 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1375 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1376 float_raise(float_flag_invalid, &env->fp_status); 1377 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1378 } else if (floatx80_invalid_encoding(ST0) || 1379 floatx80_invalid_encoding(ST1)) { 1380 float_raise(float_flag_invalid, &env->fp_status); 1381 ST1 = floatx80_default_nan(&env->fp_status); 1382 } else if (floatx80_is_any_nan(ST0)) { 1383 ST1 = ST0; 1384 } else if (floatx80_is_any_nan(ST1)) { 1385 /* Pass this NaN through. */ 1386 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1387 /* Pass this zero through. */ 1388 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1389 arg0_exp - arg1_exp >= 80) && 1390 !arg0_sign) { 1391 /* 1392 * Dividing ST1 by ST0 gives the correct result up to 1393 * rounding, and avoids spurious underflow exceptions that 1394 * might result from passing some small values through the 1395 * polynomial approximation, but if a finite nonzero result of 1396 * division is exact, the result of fpatan is still inexact 1397 * (and underflowing where appropriate). 1398 */ 1399 FloatX80RoundPrec save_prec = 1400 env->fp_status.floatx80_rounding_precision; 1401 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1402 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1403 env->fp_status.floatx80_rounding_precision = save_prec; 1404 if (!floatx80_is_zero(ST1) && 1405 !(get_float_exception_flags(&env->fp_status) & 1406 float_flag_inexact)) { 1407 /* 1408 * The mathematical result is very slightly closer to zero 1409 * than this exact result. Round a value with the 1410 * significand adjusted accordingly to get the correct 1411 * exceptions, and possibly an adjusted result depending 1412 * on the rounding mode. 1413 */ 1414 uint64_t sig = extractFloatx80Frac(ST1); 1415 int32_t exp = extractFloatx80Exp(ST1); 1416 bool sign = extractFloatx80Sign(ST1); 1417 if (exp == 0) { 1418 normalizeFloatx80Subnormal(sig, &exp, &sig); 1419 } 1420 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1421 sign, exp, sig - 1, 1422 -1, &env->fp_status); 1423 } 1424 } else { 1425 /* The result is inexact. */ 1426 bool rsign = arg1_sign; 1427 int32_t rexp; 1428 uint64_t rsig0, rsig1; 1429 if (floatx80_is_zero(ST1)) { 1430 /* 1431 * ST0 is negative. The result is pi with the sign of 1432 * ST1. 1433 */ 1434 rexp = pi_exp; 1435 rsig0 = pi_sig_high; 1436 rsig1 = pi_sig_low; 1437 } else if (floatx80_is_infinity(ST1)) { 1438 if (floatx80_is_infinity(ST0)) { 1439 if (arg0_sign) { 1440 rexp = pi_34_exp; 1441 rsig0 = pi_34_sig_high; 1442 rsig1 = pi_34_sig_low; 1443 } else { 1444 rexp = pi_4_exp; 1445 rsig0 = pi_4_sig_high; 1446 rsig1 = pi_4_sig_low; 1447 } 1448 } else { 1449 rexp = pi_2_exp; 1450 rsig0 = pi_2_sig_high; 1451 rsig1 = pi_2_sig_low; 1452 } 1453 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1454 rexp = pi_2_exp; 1455 rsig0 = pi_2_sig_high; 1456 rsig1 = pi_2_sig_low; 1457 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1458 /* ST0 is negative. */ 1459 rexp = pi_exp; 1460 rsig0 = pi_sig_high; 1461 rsig1 = pi_sig_low; 1462 } else { 1463 /* 1464 * ST0 and ST1 are finite, nonzero and with exponents not 1465 * too far apart. 1466 */ 1467 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1468 int32_t azexp, axexp; 1469 bool adj_sub, ysign, zsign; 1470 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1471 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1472 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1473 uint64_t azsig0, azsig1; 1474 uint64_t azsig2, azsig3, axsig0, axsig1; 1475 floatx80 x8; 1476 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1477 FloatX80RoundPrec save_prec = 1478 env->fp_status.floatx80_rounding_precision; 1479 env->fp_status.float_rounding_mode = float_round_nearest_even; 1480 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1481 1482 if (arg0_exp == 0) { 1483 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1484 } 1485 if (arg1_exp == 0) { 1486 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1487 } 1488 if (arg0_exp > arg1_exp || 1489 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1490 /* Work with abs(ST1) / abs(ST0). */ 1491 num_exp = arg1_exp; 1492 num_sig = arg1_sig; 1493 den_exp = arg0_exp; 1494 den_sig = arg0_sig; 1495 if (arg0_sign) { 1496 /* The result is subtracted from pi. */ 1497 adj_exp = pi_exp; 1498 adj_sig0 = pi_sig_high; 1499 adj_sig1 = pi_sig_low; 1500 adj_sub = true; 1501 } else { 1502 /* The result is used as-is. */ 1503 adj_exp = 0; 1504 adj_sig0 = 0; 1505 adj_sig1 = 0; 1506 adj_sub = false; 1507 } 1508 } else { 1509 /* Work with abs(ST0) / abs(ST1). */ 1510 num_exp = arg0_exp; 1511 num_sig = arg0_sig; 1512 den_exp = arg1_exp; 1513 den_sig = arg1_sig; 1514 /* The result is added to or subtracted from pi/2. */ 1515 adj_exp = pi_2_exp; 1516 adj_sig0 = pi_2_sig_high; 1517 adj_sig1 = pi_2_sig_low; 1518 adj_sub = !arg0_sign; 1519 } 1520 1521 /* 1522 * Compute x = num/den, where 0 < x <= 1 and x is not too 1523 * small. 1524 */ 1525 xexp = num_exp - den_exp + 0x3ffe; 1526 remsig0 = num_sig; 1527 remsig1 = 0; 1528 if (den_sig <= remsig0) { 1529 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1530 ++xexp; 1531 } 1532 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1533 mul64To128(den_sig, xsig0, &msig0, &msig1); 1534 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1535 while ((int64_t) remsig0 < 0) { 1536 --xsig0; 1537 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1538 } 1539 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1540 /* 1541 * No need to correct any estimation error in xsig1; even 1542 * with such error, it is accurate enough. 1543 */ 1544 1545 /* 1546 * Split x as x = t + y, where t = n/8 is the nearest 1547 * multiple of 1/8 to x. 1548 */ 1549 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1550 false, xexp + 3, xsig0, 1551 xsig1, &env->fp_status); 1552 n = floatx80_to_int32(x8, &env->fp_status); 1553 if (n == 0) { 1554 ysign = false; 1555 yexp = xexp; 1556 ysig0 = xsig0; 1557 ysig1 = xsig1; 1558 texp = 0; 1559 tsig = 0; 1560 } else { 1561 int shift = clz32(n) + 32; 1562 texp = 0x403b - shift; 1563 tsig = n; 1564 tsig <<= shift; 1565 if (texp == xexp) { 1566 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1567 if ((int64_t) ysig0 >= 0) { 1568 ysign = false; 1569 if (ysig0 == 0) { 1570 if (ysig1 == 0) { 1571 yexp = 0; 1572 } else { 1573 shift = clz64(ysig1) + 64; 1574 yexp = xexp - shift; 1575 shift128Left(ysig0, ysig1, shift, 1576 &ysig0, &ysig1); 1577 } 1578 } else { 1579 shift = clz64(ysig0); 1580 yexp = xexp - shift; 1581 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1582 } 1583 } else { 1584 ysign = true; 1585 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1586 if (ysig0 == 0) { 1587 shift = clz64(ysig1) + 64; 1588 } else { 1589 shift = clz64(ysig0); 1590 } 1591 yexp = xexp - shift; 1592 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1593 } 1594 } else { 1595 /* 1596 * t's exponent must be greater than x's because t 1597 * is positive and the nearest multiple of 1/8 to 1598 * x, and if x has a greater exponent, the power 1599 * of 2 with that exponent is also a multiple of 1600 * 1/8. 1601 */ 1602 uint64_t usig0, usig1; 1603 shift128RightJamming(xsig0, xsig1, texp - xexp, 1604 &usig0, &usig1); 1605 ysign = true; 1606 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1607 if (ysig0 == 0) { 1608 shift = clz64(ysig1) + 64; 1609 } else { 1610 shift = clz64(ysig0); 1611 } 1612 yexp = texp - shift; 1613 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1614 } 1615 } 1616 1617 /* 1618 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1619 * arctan(z). 1620 */ 1621 zsign = ysign; 1622 if (texp == 0 || yexp == 0) { 1623 zexp = yexp; 1624 zsig0 = ysig0; 1625 zsig1 = ysig1; 1626 } else { 1627 /* 1628 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1629 */ 1630 int32_t dexp = texp + xexp - 0x3ffe; 1631 uint64_t dsig0, dsig1, dsig2; 1632 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1633 /* 1634 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1635 * bit). Add 1 to produce the denominator 1+tx. 1636 */ 1637 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1638 &dsig0, &dsig1); 1639 dsig0 |= 0x8000000000000000ULL; 1640 zexp = yexp - 1; 1641 remsig0 = ysig0; 1642 remsig1 = ysig1; 1643 remsig2 = 0; 1644 if (dsig0 <= remsig0) { 1645 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1646 ++zexp; 1647 } 1648 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1649 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1650 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1651 &remsig0, &remsig1, &remsig2); 1652 while ((int64_t) remsig0 < 0) { 1653 --zsig0; 1654 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1655 &remsig0, &remsig1, &remsig2); 1656 } 1657 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1658 /* No need to correct any estimation error in zsig1. */ 1659 } 1660 1661 if (zexp == 0) { 1662 azexp = 0; 1663 azsig0 = 0; 1664 azsig1 = 0; 1665 } else { 1666 floatx80 z2, accum; 1667 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1668 /* Compute z^2. */ 1669 mul128To256(zsig0, zsig1, zsig0, zsig1, 1670 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1671 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1672 zexp + zexp - 0x3ffe, 1673 z2sig0, z2sig1, 1674 &env->fp_status); 1675 1676 /* Compute the lower parts of the polynomial expansion. */ 1677 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1678 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1679 accum = floatx80_mul(accum, z2, &env->fp_status); 1680 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1681 accum = floatx80_mul(accum, z2, &env->fp_status); 1682 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1683 accum = floatx80_mul(accum, z2, &env->fp_status); 1684 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1685 accum = floatx80_mul(accum, z2, &env->fp_status); 1686 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1687 accum = floatx80_mul(accum, z2, &env->fp_status); 1688 1689 /* 1690 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1691 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1692 */ 1693 aexp = extractFloatx80Exp(fpatan_coeff_0); 1694 shift128RightJamming(extractFloatx80Frac(accum), 0, 1695 aexp - extractFloatx80Exp(accum), 1696 &asig0, &asig1); 1697 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1698 &asig0, &asig1); 1699 /* Multiply by z to compute arctan(z). */ 1700 azexp = aexp + zexp - 0x3ffe; 1701 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1702 &azsig2, &azsig3); 1703 } 1704 1705 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1706 if (texp == 0) { 1707 /* z is positive. */ 1708 axexp = azexp; 1709 axsig0 = azsig0; 1710 axsig1 = azsig1; 1711 } else { 1712 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1713 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1714 uint64_t low_sig0 = 1715 extractFloatx80Frac(fpatan_table[n].atan_low); 1716 uint64_t low_sig1 = 0; 1717 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1718 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1719 axsig1 = 0; 1720 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1721 &low_sig0, &low_sig1); 1722 if (low_sign) { 1723 sub128(axsig0, axsig1, low_sig0, low_sig1, 1724 &axsig0, &axsig1); 1725 } else { 1726 add128(axsig0, axsig1, low_sig0, low_sig1, 1727 &axsig0, &axsig1); 1728 } 1729 if (azexp >= axexp) { 1730 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1731 &axsig0, &axsig1); 1732 axexp = azexp + 1; 1733 shift128RightJamming(azsig0, azsig1, 1, 1734 &azsig0, &azsig1); 1735 } else { 1736 shift128RightJamming(axsig0, axsig1, 1, 1737 &axsig0, &axsig1); 1738 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1739 &azsig0, &azsig1); 1740 ++axexp; 1741 } 1742 if (zsign) { 1743 sub128(axsig0, axsig1, azsig0, azsig1, 1744 &axsig0, &axsig1); 1745 } else { 1746 add128(axsig0, axsig1, azsig0, azsig1, 1747 &axsig0, &axsig1); 1748 } 1749 } 1750 1751 if (adj_exp == 0) { 1752 rexp = axexp; 1753 rsig0 = axsig0; 1754 rsig1 = axsig1; 1755 } else { 1756 /* 1757 * Add or subtract arctan(x) (exponent axexp, 1758 * significand axsig0 and axsig1, positive, not 1759 * necessarily normalized) to the number given by 1760 * adj_exp, adj_sig0 and adj_sig1, according to 1761 * adj_sub. 1762 */ 1763 if (adj_exp >= axexp) { 1764 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1765 &axsig0, &axsig1); 1766 rexp = adj_exp + 1; 1767 shift128RightJamming(adj_sig0, adj_sig1, 1, 1768 &adj_sig0, &adj_sig1); 1769 } else { 1770 shift128RightJamming(axsig0, axsig1, 1, 1771 &axsig0, &axsig1); 1772 shift128RightJamming(adj_sig0, adj_sig1, 1773 axexp - adj_exp + 1, 1774 &adj_sig0, &adj_sig1); 1775 rexp = axexp + 1; 1776 } 1777 if (adj_sub) { 1778 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1779 &rsig0, &rsig1); 1780 } else { 1781 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1782 &rsig0, &rsig1); 1783 } 1784 } 1785 1786 env->fp_status.float_rounding_mode = save_mode; 1787 env->fp_status.floatx80_rounding_precision = save_prec; 1788 } 1789 /* This result is inexact. */ 1790 rsig1 |= 1; 1791 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1792 rsig0, rsig1, &env->fp_status); 1793 } 1794 1795 fpop(env); 1796 merge_exception_flags(env, old_flags); 1797 } 1798 1799 void helper_fxtract(CPUX86State *env) 1800 { 1801 uint8_t old_flags = save_exception_flags(env); 1802 CPU_LDoubleU temp; 1803 1804 temp.d = ST0; 1805 1806 if (floatx80_is_zero(ST0)) { 1807 /* Easy way to generate -inf and raising division by 0 exception */ 1808 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1809 &env->fp_status); 1810 fpush(env); 1811 ST0 = temp.d; 1812 } else if (floatx80_invalid_encoding(ST0)) { 1813 float_raise(float_flag_invalid, &env->fp_status); 1814 ST0 = floatx80_default_nan(&env->fp_status); 1815 fpush(env); 1816 ST0 = ST1; 1817 } else if (floatx80_is_any_nan(ST0)) { 1818 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1819 float_raise(float_flag_invalid, &env->fp_status); 1820 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1821 } 1822 fpush(env); 1823 ST0 = ST1; 1824 } else if (floatx80_is_infinity(ST0)) { 1825 fpush(env); 1826 ST0 = ST1; 1827 ST1 = floatx80_infinity; 1828 } else { 1829 int expdif; 1830 1831 if (EXPD(temp) == 0) { 1832 int shift = clz64(temp.l.lower); 1833 temp.l.lower <<= shift; 1834 expdif = 1 - EXPBIAS - shift; 1835 float_raise(float_flag_input_denormal_flushed, &env->fp_status); 1836 } else { 1837 expdif = EXPD(temp) - EXPBIAS; 1838 } 1839 /* DP exponent bias */ 1840 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1841 fpush(env); 1842 BIASEXPONENT(temp); 1843 ST0 = temp.d; 1844 } 1845 merge_exception_flags(env, old_flags); 1846 } 1847 1848 static void helper_fprem_common(CPUX86State *env, bool mod) 1849 { 1850 uint8_t old_flags = save_exception_flags(env); 1851 uint64_t quotient; 1852 CPU_LDoubleU temp0, temp1; 1853 int exp0, exp1, expdiff; 1854 1855 temp0.d = ST0; 1856 temp1.d = ST1; 1857 exp0 = EXPD(temp0); 1858 exp1 = EXPD(temp1); 1859 1860 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1861 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1862 exp0 == 0x7fff || exp1 == 0x7fff || 1863 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1864 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1865 } else { 1866 if (exp0 == 0) { 1867 exp0 = 1 - clz64(temp0.l.lower); 1868 } 1869 if (exp1 == 0) { 1870 exp1 = 1 - clz64(temp1.l.lower); 1871 } 1872 expdiff = exp0 - exp1; 1873 if (expdiff < 64) { 1874 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1875 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1876 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1877 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1878 } else { 1879 /* 1880 * Partial remainder. This choice of how many bits to 1881 * process at once is specified in AMD instruction set 1882 * manuals, and empirically is followed by Intel 1883 * processors as well; it ensures that the final remainder 1884 * operation in a loop does produce the correct low three 1885 * bits of the quotient. AMD manuals specify that the 1886 * flags other than C2 are cleared, and empirically Intel 1887 * processors clear them as well. 1888 */ 1889 int n = 32 + (expdiff % 32); 1890 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1891 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1892 env->fpus |= 0x400; /* C2 <-- 1 */ 1893 } 1894 } 1895 merge_exception_flags(env, old_flags); 1896 } 1897 1898 void helper_fprem1(CPUX86State *env) 1899 { 1900 helper_fprem_common(env, false); 1901 } 1902 1903 void helper_fprem(CPUX86State *env) 1904 { 1905 helper_fprem_common(env, true); 1906 } 1907 1908 /* 128-bit significand of log2(e). */ 1909 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1910 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1911 1912 /* 1913 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1914 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1915 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1916 * interval [sqrt(2)/2, sqrt(2)]. 1917 */ 1918 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1919 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1920 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1921 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1922 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1923 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1924 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1925 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1926 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1927 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1928 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1929 1930 /* 1931 * Compute an approximation of log2(1+arg), where 1+arg is in the 1932 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1933 * function is called, rounding precision is set to 80 and the 1934 * round-to-nearest mode is in effect. arg must not be exactly zero, 1935 * and must not be so close to zero that underflow might occur. 1936 */ 1937 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1938 uint64_t *sig0, uint64_t *sig1) 1939 { 1940 uint64_t arg0_sig = extractFloatx80Frac(arg); 1941 int32_t arg0_exp = extractFloatx80Exp(arg); 1942 bool arg0_sign = extractFloatx80Sign(arg); 1943 bool asign; 1944 int32_t dexp, texp, aexp; 1945 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1946 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1947 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1948 floatx80 t2, accum; 1949 1950 /* 1951 * Compute an approximation of arg/(2+arg), with extra precision, 1952 * as the argument to a polynomial approximation. The extra 1953 * precision is only needed for the first term of the 1954 * approximation, with subsequent terms being significantly 1955 * smaller; the approximation only uses odd exponents, and the 1956 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1957 */ 1958 if (arg0_sign) { 1959 dexp = 0x3fff; 1960 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1961 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1962 } else { 1963 dexp = 0x4000; 1964 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1965 dsig0 |= 0x8000000000000000ULL; 1966 } 1967 texp = arg0_exp - dexp + 0x3ffe; 1968 rsig0 = arg0_sig; 1969 rsig1 = 0; 1970 rsig2 = 0; 1971 if (dsig0 <= rsig0) { 1972 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1973 ++texp; 1974 } 1975 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1976 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1977 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1978 &rsig0, &rsig1, &rsig2); 1979 while ((int64_t) rsig0 < 0) { 1980 --tsig0; 1981 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1982 &rsig0, &rsig1, &rsig2); 1983 } 1984 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1985 /* 1986 * No need to correct any estimation error in tsig1; even with 1987 * such error, it is accurate enough. Now compute the square of 1988 * that approximation. 1989 */ 1990 mul128To256(tsig0, tsig1, tsig0, tsig1, 1991 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1992 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1993 texp + texp - 0x3ffe, 1994 t2sig0, t2sig1, &env->fp_status); 1995 1996 /* Compute the lower parts of the polynomial expansion. */ 1997 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1998 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1999 accum = floatx80_mul(accum, t2, &env->fp_status); 2000 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 2001 accum = floatx80_mul(accum, t2, &env->fp_status); 2002 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 2003 accum = floatx80_mul(accum, t2, &env->fp_status); 2004 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 2005 accum = floatx80_mul(accum, t2, &env->fp_status); 2006 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 2007 accum = floatx80_mul(accum, t2, &env->fp_status); 2008 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 2009 accum = floatx80_mul(accum, t2, &env->fp_status); 2010 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 2011 accum = floatx80_mul(accum, t2, &env->fp_status); 2012 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 2013 accum = floatx80_mul(accum, t2, &env->fp_status); 2014 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 2015 2016 /* 2017 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 2018 * accum has much lower magnitude, and so, in particular, carry 2019 * out of the addition is not possible), multiplied by t. (This 2020 * expansion is only accurate to about 70 bits, not 128 bits.) 2021 */ 2022 aexp = extractFloatx80Exp(fyl2x_coeff_0); 2023 asign = extractFloatx80Sign(fyl2x_coeff_0); 2024 shift128RightJamming(extractFloatx80Frac(accum), 0, 2025 aexp - extractFloatx80Exp(accum), 2026 &asig0, &asig1); 2027 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 2028 bsig1 = 0; 2029 if (asign == extractFloatx80Sign(accum)) { 2030 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 2031 } else { 2032 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 2033 } 2034 /* Multiply by t to compute the required result. */ 2035 mul128To256(asig0, asig1, tsig0, tsig1, 2036 &asig0, &asig1, &asig2, &asig3); 2037 aexp += texp - 0x3ffe; 2038 *exp = aexp; 2039 *sig0 = asig0; 2040 *sig1 = asig1; 2041 } 2042 2043 void helper_fyl2xp1(CPUX86State *env) 2044 { 2045 uint8_t old_flags = save_exception_flags(env); 2046 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2047 int32_t arg0_exp = extractFloatx80Exp(ST0); 2048 bool arg0_sign = extractFloatx80Sign(ST0); 2049 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2050 int32_t arg1_exp = extractFloatx80Exp(ST1); 2051 bool arg1_sign = extractFloatx80Sign(ST1); 2052 2053 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2054 float_raise(float_flag_invalid, &env->fp_status); 2055 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2056 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2057 float_raise(float_flag_invalid, &env->fp_status); 2058 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2059 } else if (floatx80_invalid_encoding(ST0) || 2060 floatx80_invalid_encoding(ST1)) { 2061 float_raise(float_flag_invalid, &env->fp_status); 2062 ST1 = floatx80_default_nan(&env->fp_status); 2063 } else if (floatx80_is_any_nan(ST0)) { 2064 ST1 = ST0; 2065 } else if (floatx80_is_any_nan(ST1)) { 2066 /* Pass this NaN through. */ 2067 } else if (arg0_exp > 0x3ffd || 2068 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2069 0x95f619980c4336f7ULL : 2070 0xd413cccfe7799211ULL))) { 2071 /* 2072 * Out of range for the instruction (ST0 must have absolute 2073 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2074 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2075 * to sqrt(2) - 1, which we allow here), treat as invalid. 2076 */ 2077 float_raise(float_flag_invalid, &env->fp_status); 2078 ST1 = floatx80_default_nan(&env->fp_status); 2079 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2080 arg1_exp == 0x7fff) { 2081 /* 2082 * One argument is zero, or multiplying by infinity; correct 2083 * result is exact and can be obtained by multiplying the 2084 * arguments. 2085 */ 2086 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2087 } else if (arg0_exp < 0x3fb0) { 2088 /* 2089 * Multiplying both arguments and an extra-precision version 2090 * of log2(e) is sufficiently precise. 2091 */ 2092 uint64_t sig0, sig1, sig2; 2093 int32_t exp; 2094 if (arg0_exp == 0) { 2095 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2096 } 2097 if (arg1_exp == 0) { 2098 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2099 } 2100 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2101 &sig0, &sig1, &sig2); 2102 exp = arg0_exp + 1; 2103 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2104 exp += arg1_exp - 0x3ffe; 2105 /* This result is inexact. */ 2106 sig1 |= 1; 2107 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2108 arg0_sign ^ arg1_sign, exp, 2109 sig0, sig1, &env->fp_status); 2110 } else { 2111 int32_t aexp; 2112 uint64_t asig0, asig1, asig2; 2113 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2114 FloatX80RoundPrec save_prec = 2115 env->fp_status.floatx80_rounding_precision; 2116 env->fp_status.float_rounding_mode = float_round_nearest_even; 2117 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2118 2119 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2120 /* 2121 * Multiply by the second argument to compute the required 2122 * result. 2123 */ 2124 if (arg1_exp == 0) { 2125 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2126 } 2127 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2128 aexp += arg1_exp - 0x3ffe; 2129 /* This result is inexact. */ 2130 asig1 |= 1; 2131 env->fp_status.float_rounding_mode = save_mode; 2132 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2133 arg0_sign ^ arg1_sign, aexp, 2134 asig0, asig1, &env->fp_status); 2135 env->fp_status.floatx80_rounding_precision = save_prec; 2136 } 2137 fpop(env); 2138 merge_exception_flags(env, old_flags); 2139 } 2140 2141 void helper_fyl2x(CPUX86State *env) 2142 { 2143 uint8_t old_flags = save_exception_flags(env); 2144 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2145 int32_t arg0_exp = extractFloatx80Exp(ST0); 2146 bool arg0_sign = extractFloatx80Sign(ST0); 2147 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2148 int32_t arg1_exp = extractFloatx80Exp(ST1); 2149 bool arg1_sign = extractFloatx80Sign(ST1); 2150 2151 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2152 float_raise(float_flag_invalid, &env->fp_status); 2153 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2154 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2155 float_raise(float_flag_invalid, &env->fp_status); 2156 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2157 } else if (floatx80_invalid_encoding(ST0) || 2158 floatx80_invalid_encoding(ST1)) { 2159 float_raise(float_flag_invalid, &env->fp_status); 2160 ST1 = floatx80_default_nan(&env->fp_status); 2161 } else if (floatx80_is_any_nan(ST0)) { 2162 ST1 = ST0; 2163 } else if (floatx80_is_any_nan(ST1)) { 2164 /* Pass this NaN through. */ 2165 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2166 float_raise(float_flag_invalid, &env->fp_status); 2167 ST1 = floatx80_default_nan(&env->fp_status); 2168 } else if (floatx80_is_infinity(ST1)) { 2169 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2170 &env->fp_status); 2171 switch (cmp) { 2172 case float_relation_less: 2173 ST1 = floatx80_chs(ST1); 2174 break; 2175 case float_relation_greater: 2176 /* Result is infinity of the same sign as ST1. */ 2177 break; 2178 default: 2179 float_raise(float_flag_invalid, &env->fp_status); 2180 ST1 = floatx80_default_nan(&env->fp_status); 2181 break; 2182 } 2183 } else if (floatx80_is_infinity(ST0)) { 2184 if (floatx80_is_zero(ST1)) { 2185 float_raise(float_flag_invalid, &env->fp_status); 2186 ST1 = floatx80_default_nan(&env->fp_status); 2187 } else if (arg1_sign) { 2188 ST1 = floatx80_chs(ST0); 2189 } else { 2190 ST1 = ST0; 2191 } 2192 } else if (floatx80_is_zero(ST0)) { 2193 if (floatx80_is_zero(ST1)) { 2194 float_raise(float_flag_invalid, &env->fp_status); 2195 ST1 = floatx80_default_nan(&env->fp_status); 2196 } else { 2197 /* Result is infinity with opposite sign to ST1. */ 2198 float_raise(float_flag_divbyzero, &env->fp_status); 2199 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2200 0x8000000000000000ULL); 2201 } 2202 } else if (floatx80_is_zero(ST1)) { 2203 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2204 ST1 = floatx80_chs(ST1); 2205 } 2206 /* Otherwise, ST1 is already the correct result. */ 2207 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2208 if (arg1_sign) { 2209 ST1 = floatx80_chs(floatx80_zero); 2210 } else { 2211 ST1 = floatx80_zero; 2212 } 2213 } else { 2214 int32_t int_exp; 2215 floatx80 arg0_m1; 2216 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2217 FloatX80RoundPrec save_prec = 2218 env->fp_status.floatx80_rounding_precision; 2219 env->fp_status.float_rounding_mode = float_round_nearest_even; 2220 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2221 2222 if (arg0_exp == 0) { 2223 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2224 } 2225 if (arg1_exp == 0) { 2226 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2227 } 2228 int_exp = arg0_exp - 0x3fff; 2229 if (arg0_sig > 0xb504f333f9de6484ULL) { 2230 ++int_exp; 2231 } 2232 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2233 &env->fp_status), 2234 floatx80_one, &env->fp_status); 2235 if (floatx80_is_zero(arg0_m1)) { 2236 /* Exact power of 2; multiply by ST1. */ 2237 env->fp_status.float_rounding_mode = save_mode; 2238 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2239 ST1, &env->fp_status); 2240 } else { 2241 bool asign = extractFloatx80Sign(arg0_m1); 2242 int32_t aexp; 2243 uint64_t asig0, asig1, asig2; 2244 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2245 if (int_exp != 0) { 2246 bool isign = (int_exp < 0); 2247 int32_t iexp; 2248 uint64_t isig; 2249 int shift; 2250 int_exp = isign ? -int_exp : int_exp; 2251 shift = clz32(int_exp) + 32; 2252 isig = int_exp; 2253 isig <<= shift; 2254 iexp = 0x403e - shift; 2255 shift128RightJamming(asig0, asig1, iexp - aexp, 2256 &asig0, &asig1); 2257 if (asign == isign) { 2258 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2259 } else { 2260 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2261 } 2262 aexp = iexp; 2263 asign = isign; 2264 } 2265 /* 2266 * Multiply by the second argument to compute the required 2267 * result. 2268 */ 2269 if (arg1_exp == 0) { 2270 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2271 } 2272 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2273 aexp += arg1_exp - 0x3ffe; 2274 /* This result is inexact. */ 2275 asig1 |= 1; 2276 env->fp_status.float_rounding_mode = save_mode; 2277 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2278 asign ^ arg1_sign, aexp, 2279 asig0, asig1, &env->fp_status); 2280 } 2281 2282 env->fp_status.floatx80_rounding_precision = save_prec; 2283 } 2284 fpop(env); 2285 merge_exception_flags(env, old_flags); 2286 } 2287 2288 void helper_fsqrt(CPUX86State *env) 2289 { 2290 uint8_t old_flags = save_exception_flags(env); 2291 if (floatx80_is_neg(ST0)) { 2292 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2293 env->fpus |= 0x400; 2294 } 2295 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2296 merge_exception_flags(env, old_flags); 2297 } 2298 2299 void helper_fsincos(CPUX86State *env) 2300 { 2301 double fptemp = floatx80_to_double(env, ST0); 2302 2303 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2304 env->fpus |= 0x400; 2305 } else { 2306 ST0 = double_to_floatx80(env, sin(fptemp)); 2307 fpush(env); 2308 ST0 = double_to_floatx80(env, cos(fptemp)); 2309 env->fpus &= ~0x400; /* C2 <-- 0 */ 2310 /* the above code is for |arg| < 2**63 only */ 2311 } 2312 } 2313 2314 void helper_frndint(CPUX86State *env) 2315 { 2316 uint8_t old_flags = save_exception_flags(env); 2317 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2318 merge_exception_flags(env, old_flags); 2319 } 2320 2321 void helper_fscale(CPUX86State *env) 2322 { 2323 uint8_t old_flags = save_exception_flags(env); 2324 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2325 float_raise(float_flag_invalid, &env->fp_status); 2326 ST0 = floatx80_default_nan(&env->fp_status); 2327 } else if (floatx80_is_any_nan(ST1)) { 2328 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2329 float_raise(float_flag_invalid, &env->fp_status); 2330 } 2331 ST0 = ST1; 2332 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2333 float_raise(float_flag_invalid, &env->fp_status); 2334 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2335 } 2336 } else if (floatx80_is_infinity(ST1) && 2337 !floatx80_invalid_encoding(ST0) && 2338 !floatx80_is_any_nan(ST0)) { 2339 if (floatx80_is_neg(ST1)) { 2340 if (floatx80_is_infinity(ST0)) { 2341 float_raise(float_flag_invalid, &env->fp_status); 2342 ST0 = floatx80_default_nan(&env->fp_status); 2343 } else { 2344 ST0 = (floatx80_is_neg(ST0) ? 2345 floatx80_chs(floatx80_zero) : 2346 floatx80_zero); 2347 } 2348 } else { 2349 if (floatx80_is_zero(ST0)) { 2350 float_raise(float_flag_invalid, &env->fp_status); 2351 ST0 = floatx80_default_nan(&env->fp_status); 2352 } else { 2353 ST0 = (floatx80_is_neg(ST0) ? 2354 floatx80_chs(floatx80_infinity) : 2355 floatx80_infinity); 2356 } 2357 } 2358 } else { 2359 int n; 2360 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2361 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2362 set_float_exception_flags(0, &env->fp_status); 2363 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2364 set_float_exception_flags(save_flags, &env->fp_status); 2365 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2366 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2367 env->fp_status.floatx80_rounding_precision = save; 2368 } 2369 merge_exception_flags(env, old_flags); 2370 } 2371 2372 void helper_fsin(CPUX86State *env) 2373 { 2374 double fptemp = floatx80_to_double(env, ST0); 2375 2376 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2377 env->fpus |= 0x400; 2378 } else { 2379 ST0 = double_to_floatx80(env, sin(fptemp)); 2380 env->fpus &= ~0x400; /* C2 <-- 0 */ 2381 /* the above code is for |arg| < 2**53 only */ 2382 } 2383 } 2384 2385 void helper_fcos(CPUX86State *env) 2386 { 2387 double fptemp = floatx80_to_double(env, ST0); 2388 2389 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2390 env->fpus |= 0x400; 2391 } else { 2392 ST0 = double_to_floatx80(env, cos(fptemp)); 2393 env->fpus &= ~0x400; /* C2 <-- 0 */ 2394 /* the above code is for |arg| < 2**63 only */ 2395 } 2396 } 2397 2398 void helper_fxam_ST0(CPUX86State *env) 2399 { 2400 CPU_LDoubleU temp; 2401 int expdif; 2402 2403 temp.d = ST0; 2404 2405 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2406 if (SIGND(temp)) { 2407 env->fpus |= 0x200; /* C1 <-- 1 */ 2408 } 2409 2410 if (env->fptags[env->fpstt]) { 2411 env->fpus |= 0x4100; /* Empty */ 2412 return; 2413 } 2414 2415 expdif = EXPD(temp); 2416 if (expdif == MAXEXPD) { 2417 if (MANTD(temp) == 0x8000000000000000ULL) { 2418 env->fpus |= 0x500; /* Infinity */ 2419 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2420 env->fpus |= 0x100; /* NaN */ 2421 } 2422 } else if (expdif == 0) { 2423 if (MANTD(temp) == 0) { 2424 env->fpus |= 0x4000; /* Zero */ 2425 } else { 2426 env->fpus |= 0x4400; /* Denormal */ 2427 } 2428 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2429 env->fpus |= 0x400; 2430 } 2431 } 2432 2433 static void do_fstenv(X86Access *ac, target_ulong ptr, int data32) 2434 { 2435 CPUX86State *env = ac->env; 2436 int fpus, fptag, exp, i; 2437 uint64_t mant; 2438 CPU_LDoubleU tmp; 2439 2440 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2441 fptag = 0; 2442 for (i = 7; i >= 0; i--) { 2443 fptag <<= 2; 2444 if (env->fptags[i]) { 2445 fptag |= 3; 2446 } else { 2447 tmp.d = env->fpregs[i].d; 2448 exp = EXPD(tmp); 2449 mant = MANTD(tmp); 2450 if (exp == 0 && mant == 0) { 2451 /* zero */ 2452 fptag |= 1; 2453 } else if (exp == 0 || exp == MAXEXPD 2454 || (mant & (1LL << 63)) == 0) { 2455 /* NaNs, infinity, denormal */ 2456 fptag |= 2; 2457 } 2458 } 2459 } 2460 if (data32) { 2461 /* 32 bit */ 2462 access_stl(ac, ptr, env->fpuc); 2463 access_stl(ac, ptr + 4, fpus); 2464 access_stl(ac, ptr + 8, fptag); 2465 access_stl(ac, ptr + 12, env->fpip); /* fpip */ 2466 access_stl(ac, ptr + 16, env->fpcs); /* fpcs */ 2467 access_stl(ac, ptr + 20, env->fpdp); /* fpoo */ 2468 access_stl(ac, ptr + 24, env->fpds); /* fpos */ 2469 } else { 2470 /* 16 bit */ 2471 access_stw(ac, ptr, env->fpuc); 2472 access_stw(ac, ptr + 2, fpus); 2473 access_stw(ac, ptr + 4, fptag); 2474 access_stw(ac, ptr + 6, env->fpip); 2475 access_stw(ac, ptr + 8, env->fpcs); 2476 access_stw(ac, ptr + 10, env->fpdp); 2477 access_stw(ac, ptr + 12, env->fpds); 2478 } 2479 } 2480 2481 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2482 { 2483 X86Access ac; 2484 2485 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); 2486 do_fstenv(&ac, ptr, data32); 2487 } 2488 2489 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2490 { 2491 env->fpstt = (fpus >> 11) & 7; 2492 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2493 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2494 #if !defined(CONFIG_USER_ONLY) 2495 if (!(env->fpus & FPUS_SE)) { 2496 /* 2497 * Here the processor deasserts FERR#; in response, the chipset deasserts 2498 * IGNNE#. 2499 */ 2500 cpu_clear_ignne(); 2501 } 2502 #endif 2503 } 2504 2505 static void do_fldenv(X86Access *ac, target_ulong ptr, int data32) 2506 { 2507 int i, fpus, fptag; 2508 CPUX86State *env = ac->env; 2509 2510 cpu_set_fpuc(env, access_ldw(ac, ptr)); 2511 fpus = access_ldw(ac, ptr + (2 << data32)); 2512 fptag = access_ldw(ac, ptr + (4 << data32)); 2513 2514 cpu_set_fpus(env, fpus); 2515 for (i = 0; i < 8; i++) { 2516 env->fptags[i] = ((fptag & 3) == 3); 2517 fptag >>= 2; 2518 } 2519 } 2520 2521 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2522 { 2523 X86Access ac; 2524 2525 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); 2526 do_fldenv(&ac, ptr, data32); 2527 } 2528 2529 static void do_fsave(X86Access *ac, target_ulong ptr, int data32) 2530 { 2531 CPUX86State *env = ac->env; 2532 2533 do_fstenv(ac, ptr, data32); 2534 ptr += 14 << data32; 2535 2536 for (int i = 0; i < 8; i++) { 2537 floatx80 tmp = ST(i); 2538 do_fstt(ac, ptr, tmp); 2539 ptr += 10; 2540 } 2541 2542 do_fninit(env); 2543 } 2544 2545 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2546 { 2547 int size = (14 << data32) + 80; 2548 X86Access ac; 2549 2550 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC()); 2551 do_fsave(&ac, ptr, data32); 2552 } 2553 2554 static void do_frstor(X86Access *ac, target_ulong ptr, int data32) 2555 { 2556 CPUX86State *env = ac->env; 2557 2558 do_fldenv(ac, ptr, data32); 2559 ptr += 14 << data32; 2560 2561 for (int i = 0; i < 8; i++) { 2562 floatx80 tmp = do_fldt(ac, ptr); 2563 ST(i) = tmp; 2564 ptr += 10; 2565 } 2566 } 2567 2568 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2569 { 2570 int size = (14 << data32) + 80; 2571 X86Access ac; 2572 2573 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC()); 2574 do_frstor(&ac, ptr, data32); 2575 } 2576 2577 #define XO(X) offsetof(X86XSaveArea, X) 2578 2579 static void do_xsave_fpu(X86Access *ac, target_ulong ptr) 2580 { 2581 CPUX86State *env = ac->env; 2582 int fpus, fptag, i; 2583 target_ulong addr; 2584 2585 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2586 fptag = 0; 2587 for (i = 0; i < 8; i++) { 2588 fptag |= (env->fptags[i] << i); 2589 } 2590 2591 access_stw(ac, ptr + XO(legacy.fcw), env->fpuc); 2592 access_stw(ac, ptr + XO(legacy.fsw), fpus); 2593 access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff); 2594 2595 /* In 32-bit mode this is eip, sel, dp, sel. 2596 In 64-bit mode this is rip, rdp. 2597 But in either case we don't write actual data, just zeros. */ 2598 access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */ 2599 access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */ 2600 2601 addr = ptr + XO(legacy.fpregs); 2602 2603 for (i = 0; i < 8; i++) { 2604 floatx80 tmp = ST(i); 2605 do_fstt(ac, addr, tmp); 2606 addr += 16; 2607 } 2608 } 2609 2610 static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr) 2611 { 2612 CPUX86State *env = ac->env; 2613 2614 update_mxcsr_from_sse_status(env); 2615 access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr); 2616 access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff); 2617 } 2618 2619 static void do_xsave_sse(X86Access *ac, target_ulong ptr) 2620 { 2621 CPUX86State *env = ac->env; 2622 int i, nb_xmm_regs; 2623 target_ulong addr; 2624 2625 if (env->hflags & HF_CS64_MASK) { 2626 nb_xmm_regs = 16; 2627 } else { 2628 nb_xmm_regs = 8; 2629 } 2630 2631 addr = ptr + XO(legacy.xmm_regs); 2632 for (i = 0; i < nb_xmm_regs; i++) { 2633 access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0)); 2634 access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1)); 2635 addr += 16; 2636 } 2637 } 2638 2639 static void do_xsave_ymmh(X86Access *ac, target_ulong ptr) 2640 { 2641 CPUX86State *env = ac->env; 2642 int i, nb_xmm_regs; 2643 2644 if (env->hflags & HF_CS64_MASK) { 2645 nb_xmm_regs = 16; 2646 } else { 2647 nb_xmm_regs = 8; 2648 } 2649 2650 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2651 access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2)); 2652 access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3)); 2653 } 2654 } 2655 2656 static void do_xsave_bndregs(X86Access *ac, target_ulong ptr) 2657 { 2658 CPUX86State *env = ac->env; 2659 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2660 int i; 2661 2662 for (i = 0; i < 4; i++, addr += 16) { 2663 access_stq(ac, addr, env->bnd_regs[i].lb); 2664 access_stq(ac, addr + 8, env->bnd_regs[i].ub); 2665 } 2666 } 2667 2668 static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr) 2669 { 2670 CPUX86State *env = ac->env; 2671 2672 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2673 env->bndcs_regs.cfgu); 2674 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2675 env->bndcs_regs.sts); 2676 } 2677 2678 static void do_xsave_pkru(X86Access *ac, target_ulong ptr) 2679 { 2680 access_stq(ac, ptr, ac->env->pkru); 2681 } 2682 2683 static void do_fxsave(X86Access *ac, target_ulong ptr) 2684 { 2685 CPUX86State *env = ac->env; 2686 2687 do_xsave_fpu(ac, ptr); 2688 if (env->cr[4] & CR4_OSFXSR_MASK) { 2689 do_xsave_mxcsr(ac, ptr); 2690 /* Fast FXSAVE leaves out the XMM registers */ 2691 if (!(env->efer & MSR_EFER_FFXSR) 2692 || (env->hflags & HF_CPL_MASK) 2693 || !(env->hflags & HF_LMA_MASK)) { 2694 do_xsave_sse(ac, ptr); 2695 } 2696 } 2697 } 2698 2699 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2700 { 2701 uintptr_t ra = GETPC(); 2702 X86Access ac; 2703 2704 /* The operand must be 16 byte aligned */ 2705 if (ptr & 0xf) { 2706 raise_exception_ra(env, EXCP0D_GPF, ra); 2707 } 2708 2709 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), 2710 MMU_DATA_STORE, ra); 2711 do_fxsave(&ac, ptr); 2712 } 2713 2714 static uint64_t get_xinuse(CPUX86State *env) 2715 { 2716 uint64_t inuse = -1; 2717 2718 /* For the most part, we don't track XINUSE. We could calculate it 2719 here for all components, but it's probably less work to simply 2720 indicate in use. That said, the state of BNDREGS is important 2721 enough to track in HFLAGS, so we might as well use that here. */ 2722 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2723 inuse &= ~XSTATE_BNDREGS_MASK; 2724 } 2725 return inuse; 2726 } 2727 2728 static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm, 2729 uint64_t inuse, uint64_t opt) 2730 { 2731 uint64_t old_bv, new_bv; 2732 2733 if (opt & XSTATE_FP_MASK) { 2734 do_xsave_fpu(ac, ptr); 2735 } 2736 if (rfbm & XSTATE_SSE_MASK) { 2737 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2738 do_xsave_mxcsr(ac, ptr); 2739 } 2740 if (opt & XSTATE_SSE_MASK) { 2741 do_xsave_sse(ac, ptr); 2742 } 2743 if (opt & XSTATE_YMM_MASK) { 2744 do_xsave_ymmh(ac, ptr + XO(avx_state)); 2745 } 2746 if (opt & XSTATE_BNDREGS_MASK) { 2747 do_xsave_bndregs(ac, ptr + XO(bndreg_state)); 2748 } 2749 if (opt & XSTATE_BNDCSR_MASK) { 2750 do_xsave_bndcsr(ac, ptr + XO(bndcsr_state)); 2751 } 2752 if (opt & XSTATE_PKRU_MASK) { 2753 do_xsave_pkru(ac, ptr + XO(pkru_state)); 2754 } 2755 2756 /* Update the XSTATE_BV field. */ 2757 old_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); 2758 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2759 access_stq(ac, ptr + XO(header.xstate_bv), new_bv); 2760 } 2761 2762 static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2763 { 2764 /* The OS must have enabled XSAVE. */ 2765 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2766 raise_exception_ra(env, EXCP06_ILLOP, ra); 2767 } 2768 2769 /* The operand must be 64 byte aligned. */ 2770 if (ptr & 63) { 2771 raise_exception_ra(env, EXCP0D_GPF, ra); 2772 } 2773 } 2774 2775 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2776 uint64_t inuse, uint64_t opt, uintptr_t ra) 2777 { 2778 X86Access ac; 2779 unsigned size; 2780 2781 do_xsave_chk(env, ptr, ra); 2782 2783 /* Never save anything not enabled by XCR0. */ 2784 rfbm &= env->xcr0; 2785 opt &= rfbm; 2786 size = xsave_area_size(opt, false); 2787 2788 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra); 2789 do_xsave_access(&ac, ptr, rfbm, inuse, opt); 2790 } 2791 2792 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2793 { 2794 do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC()); 2795 } 2796 2797 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2798 { 2799 uint64_t inuse = get_xinuse(env); 2800 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2801 } 2802 2803 static void do_xrstor_fpu(X86Access *ac, target_ulong ptr) 2804 { 2805 CPUX86State *env = ac->env; 2806 int i, fpuc, fpus, fptag; 2807 target_ulong addr; 2808 2809 fpuc = access_ldw(ac, ptr + XO(legacy.fcw)); 2810 fpus = access_ldw(ac, ptr + XO(legacy.fsw)); 2811 fptag = access_ldw(ac, ptr + XO(legacy.ftw)); 2812 cpu_set_fpuc(env, fpuc); 2813 cpu_set_fpus(env, fpus); 2814 2815 fptag ^= 0xff; 2816 for (i = 0; i < 8; i++) { 2817 env->fptags[i] = ((fptag >> i) & 1); 2818 } 2819 2820 addr = ptr + XO(legacy.fpregs); 2821 2822 for (i = 0; i < 8; i++) { 2823 floatx80 tmp = do_fldt(ac, addr); 2824 ST(i) = tmp; 2825 addr += 16; 2826 } 2827 } 2828 2829 static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr) 2830 { 2831 CPUX86State *env = ac->env; 2832 cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr))); 2833 } 2834 2835 static void do_xrstor_sse(X86Access *ac, target_ulong ptr) 2836 { 2837 CPUX86State *env = ac->env; 2838 int i, nb_xmm_regs; 2839 target_ulong addr; 2840 2841 if (env->hflags & HF_CS64_MASK) { 2842 nb_xmm_regs = 16; 2843 } else { 2844 nb_xmm_regs = 8; 2845 } 2846 2847 addr = ptr + XO(legacy.xmm_regs); 2848 for (i = 0; i < nb_xmm_regs; i++) { 2849 env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr); 2850 env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8); 2851 addr += 16; 2852 } 2853 } 2854 2855 static void do_clear_sse(CPUX86State *env) 2856 { 2857 int i, nb_xmm_regs; 2858 2859 if (env->hflags & HF_CS64_MASK) { 2860 nb_xmm_regs = 16; 2861 } else { 2862 nb_xmm_regs = 8; 2863 } 2864 2865 for (i = 0; i < nb_xmm_regs; i++) { 2866 env->xmm_regs[i].ZMM_Q(0) = 0; 2867 env->xmm_regs[i].ZMM_Q(1) = 0; 2868 } 2869 } 2870 2871 static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr) 2872 { 2873 CPUX86State *env = ac->env; 2874 int i, nb_xmm_regs; 2875 2876 if (env->hflags & HF_CS64_MASK) { 2877 nb_xmm_regs = 16; 2878 } else { 2879 nb_xmm_regs = 8; 2880 } 2881 2882 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2883 env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr); 2884 env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8); 2885 } 2886 } 2887 2888 static void do_clear_ymmh(CPUX86State *env) 2889 { 2890 int i, nb_xmm_regs; 2891 2892 if (env->hflags & HF_CS64_MASK) { 2893 nb_xmm_regs = 16; 2894 } else { 2895 nb_xmm_regs = 8; 2896 } 2897 2898 for (i = 0; i < nb_xmm_regs; i++) { 2899 env->xmm_regs[i].ZMM_Q(2) = 0; 2900 env->xmm_regs[i].ZMM_Q(3) = 0; 2901 } 2902 } 2903 2904 static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr) 2905 { 2906 CPUX86State *env = ac->env; 2907 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2908 int i; 2909 2910 for (i = 0; i < 4; i++, addr += 16) { 2911 env->bnd_regs[i].lb = access_ldq(ac, addr); 2912 env->bnd_regs[i].ub = access_ldq(ac, addr + 8); 2913 } 2914 } 2915 2916 static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr) 2917 { 2918 CPUX86State *env = ac->env; 2919 2920 /* FIXME: Extend highest implemented bit of linear address. */ 2921 env->bndcs_regs.cfgu 2922 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu)); 2923 env->bndcs_regs.sts 2924 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts)); 2925 } 2926 2927 static void do_xrstor_pkru(X86Access *ac, target_ulong ptr) 2928 { 2929 ac->env->pkru = access_ldq(ac, ptr); 2930 } 2931 2932 static void do_fxrstor(X86Access *ac, target_ulong ptr) 2933 { 2934 CPUX86State *env = ac->env; 2935 2936 do_xrstor_fpu(ac, ptr); 2937 if (env->cr[4] & CR4_OSFXSR_MASK) { 2938 do_xrstor_mxcsr(ac, ptr); 2939 /* Fast FXRSTOR leaves out the XMM registers */ 2940 if (!(env->efer & MSR_EFER_FFXSR) 2941 || (env->hflags & HF_CPL_MASK) 2942 || !(env->hflags & HF_LMA_MASK)) { 2943 do_xrstor_sse(ac, ptr); 2944 } 2945 } 2946 } 2947 2948 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2949 { 2950 uintptr_t ra = GETPC(); 2951 X86Access ac; 2952 2953 /* The operand must be 16 byte aligned */ 2954 if (ptr & 0xf) { 2955 raise_exception_ra(env, EXCP0D_GPF, ra); 2956 } 2957 2958 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), 2959 MMU_DATA_LOAD, ra); 2960 do_fxrstor(&ac, ptr); 2961 } 2962 2963 static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv, 2964 target_ulong ptr) 2965 { 2966 uint64_t xstate_bv, xcomp_bv, reserve0; 2967 2968 xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); 2969 xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv)); 2970 reserve0 = access_ldq(ac, ptr + XO(header.reserve0)); 2971 *pxsbv = xstate_bv; 2972 2973 /* 2974 * XCOMP_BV bit 63 indicates compact form, which we do not support, 2975 * and thus must raise #GP. That leaves us in standard form. 2976 * In standard form, bytes 23:8 must be zero -- which is both 2977 * XCOMP_BV and the following 64-bit field. 2978 */ 2979 if (xcomp_bv || reserve0) { 2980 return false; 2981 } 2982 2983 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2984 return (xstate_bv & ~ac->env->xcr0) == 0; 2985 } 2986 2987 static void do_xrstor(X86Access *ac, target_ulong ptr, 2988 uint64_t rfbm, uint64_t xstate_bv) 2989 { 2990 CPUX86State *env = ac->env; 2991 2992 if (rfbm & XSTATE_FP_MASK) { 2993 if (xstate_bv & XSTATE_FP_MASK) { 2994 do_xrstor_fpu(ac, ptr); 2995 } else { 2996 do_fninit(env); 2997 memset(env->fpregs, 0, sizeof(env->fpregs)); 2998 } 2999 } 3000 if (rfbm & XSTATE_SSE_MASK) { 3001 /* Note that the standard form of XRSTOR loads MXCSR from memory 3002 whether or not the XSTATE_BV bit is set. */ 3003 do_xrstor_mxcsr(ac, ptr); 3004 if (xstate_bv & XSTATE_SSE_MASK) { 3005 do_xrstor_sse(ac, ptr); 3006 } else { 3007 do_clear_sse(env); 3008 } 3009 } 3010 if (rfbm & XSTATE_YMM_MASK) { 3011 if (xstate_bv & XSTATE_YMM_MASK) { 3012 do_xrstor_ymmh(ac, ptr + XO(avx_state)); 3013 } else { 3014 do_clear_ymmh(env); 3015 } 3016 } 3017 if (rfbm & XSTATE_BNDREGS_MASK) { 3018 if (xstate_bv & XSTATE_BNDREGS_MASK) { 3019 do_xrstor_bndregs(ac, ptr + XO(bndreg_state)); 3020 env->hflags |= HF_MPX_IU_MASK; 3021 } else { 3022 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 3023 env->hflags &= ~HF_MPX_IU_MASK; 3024 } 3025 } 3026 if (rfbm & XSTATE_BNDCSR_MASK) { 3027 if (xstate_bv & XSTATE_BNDCSR_MASK) { 3028 do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state)); 3029 } else { 3030 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 3031 } 3032 cpu_sync_bndcs_hflags(env); 3033 } 3034 if (rfbm & XSTATE_PKRU_MASK) { 3035 uint64_t old_pkru = env->pkru; 3036 if (xstate_bv & XSTATE_PKRU_MASK) { 3037 do_xrstor_pkru(ac, ptr + XO(pkru_state)); 3038 } else { 3039 env->pkru = 0; 3040 } 3041 if (env->pkru != old_pkru) { 3042 CPUState *cs = env_cpu(env); 3043 tlb_flush(cs); 3044 } 3045 } 3046 } 3047 3048 #undef XO 3049 3050 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 3051 { 3052 uintptr_t ra = GETPC(); 3053 X86Access ac; 3054 uint64_t xstate_bv; 3055 unsigned size, size_ext; 3056 3057 do_xsave_chk(env, ptr, ra); 3058 3059 /* Begin with just the minimum size to validate the header. */ 3060 size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); 3061 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); 3062 if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) { 3063 raise_exception_ra(env, EXCP0D_GPF, ra); 3064 } 3065 3066 rfbm &= env->xcr0; 3067 size_ext = xsave_area_size(rfbm & xstate_bv, false); 3068 if (size < size_ext) { 3069 /* TODO: See if existing page probe has covered extra size. */ 3070 access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra); 3071 } 3072 3073 do_xrstor(&ac, ptr, rfbm, xstate_bv); 3074 } 3075 3076 #if defined(CONFIG_USER_ONLY) 3077 void cpu_x86_fsave(CPUX86State *env, void *host, size_t len) 3078 { 3079 X86Access ac = { 3080 .haddr1 = host, 3081 .size = 4 * 7 + 8 * 10, 3082 .env = env, 3083 }; 3084 3085 assert(ac.size <= len); 3086 do_fsave(&ac, 0, true); 3087 } 3088 3089 void cpu_x86_frstor(CPUX86State *env, void *host, size_t len) 3090 { 3091 X86Access ac = { 3092 .haddr1 = host, 3093 .size = 4 * 7 + 8 * 10, 3094 .env = env, 3095 }; 3096 3097 assert(ac.size <= len); 3098 do_frstor(&ac, 0, true); 3099 } 3100 3101 void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len) 3102 { 3103 X86Access ac = { 3104 .haddr1 = host, 3105 .size = sizeof(X86LegacyXSaveArea), 3106 .env = env, 3107 }; 3108 3109 assert(ac.size <= len); 3110 do_fxsave(&ac, 0); 3111 } 3112 3113 void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len) 3114 { 3115 X86Access ac = { 3116 .haddr1 = host, 3117 .size = sizeof(X86LegacyXSaveArea), 3118 .env = env, 3119 }; 3120 3121 assert(ac.size <= len); 3122 do_fxrstor(&ac, 0); 3123 } 3124 3125 void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm) 3126 { 3127 X86Access ac = { 3128 .haddr1 = host, 3129 .env = env, 3130 }; 3131 3132 /* 3133 * Since this is only called from user-level signal handling, 3134 * we should have done the job correctly there. 3135 */ 3136 assert((rfbm & ~env->xcr0) == 0); 3137 ac.size = xsave_area_size(rfbm, false); 3138 assert(ac.size <= len); 3139 do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm); 3140 } 3141 3142 bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm) 3143 { 3144 X86Access ac = { 3145 .haddr1 = host, 3146 .env = env, 3147 }; 3148 uint64_t xstate_bv; 3149 3150 /* 3151 * Since this is only called from user-level signal handling, 3152 * we should have done the job correctly there. 3153 */ 3154 assert((rfbm & ~env->xcr0) == 0); 3155 ac.size = xsave_area_size(rfbm, false); 3156 assert(ac.size <= len); 3157 3158 if (!valid_xrstor_header(&ac, &xstate_bv, 0)) { 3159 return false; 3160 } 3161 do_xrstor(&ac, 0, rfbm, xstate_bv); 3162 return true; 3163 } 3164 #endif 3165 3166 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 3167 { 3168 /* The OS must have enabled XSAVE. */ 3169 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3170 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3171 } 3172 3173 switch (ecx) { 3174 case 0: 3175 return env->xcr0; 3176 case 1: 3177 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 3178 return env->xcr0 & get_xinuse(env); 3179 } 3180 break; 3181 } 3182 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3183 } 3184 3185 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 3186 { 3187 uint32_t dummy, ena_lo, ena_hi; 3188 uint64_t ena; 3189 3190 /* The OS must have enabled XSAVE. */ 3191 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3192 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3193 } 3194 3195 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 3196 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 3197 goto do_gpf; 3198 } 3199 3200 /* SSE can be disabled, but only if AVX is disabled too. */ 3201 if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) { 3202 goto do_gpf; 3203 } 3204 3205 /* Disallow enabling unimplemented features. */ 3206 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 3207 ena = ((uint64_t)ena_hi << 32) | ena_lo; 3208 if (mask & ~ena) { 3209 goto do_gpf; 3210 } 3211 3212 /* Disallow enabling only half of MPX. */ 3213 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 3214 & XSTATE_BNDCSR_MASK) { 3215 goto do_gpf; 3216 } 3217 3218 env->xcr0 = mask; 3219 cpu_sync_bndcs_hflags(env); 3220 cpu_sync_avx_hflag(env); 3221 return; 3222 3223 do_gpf: 3224 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3225 } 3226 3227 /* MMX/SSE */ 3228 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 3229 3230 #define SSE_DAZ 0x0040 3231 #define SSE_RC_SHIFT 13 3232 #define SSE_RC_MASK (3 << SSE_RC_SHIFT) 3233 #define SSE_FZ 0x8000 3234 3235 void update_mxcsr_status(CPUX86State *env) 3236 { 3237 uint32_t mxcsr = env->mxcsr; 3238 int rnd_type; 3239 3240 /* set rounding mode */ 3241 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; 3242 set_x86_rounding_mode(rnd_type, &env->sse_status); 3243 3244 /* Set exception flags. */ 3245 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 3246 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 3247 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3248 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3249 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3250 &env->sse_status); 3251 3252 /* set denormals are zero */ 3253 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3254 3255 /* set flush to zero */ 3256 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3257 } 3258 3259 void update_mxcsr_from_sse_status(CPUX86State *env) 3260 { 3261 uint8_t flags = get_float_exception_flags(&env->sse_status); 3262 /* 3263 * The MXCSR denormal flag has opposite semantics to 3264 * float_flag_input_denormal_flushed (the softfloat code sets that flag 3265 * only when flushing input denormals to zero, but SSE sets it 3266 * only when not flushing them to zero), so is not converted 3267 * here. 3268 */ 3269 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3270 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3271 (flags & float_flag_overflow ? FPUS_OE : 0) | 3272 (flags & float_flag_underflow ? FPUS_UE : 0) | 3273 (flags & float_flag_inexact ? FPUS_PE : 0) | 3274 (flags & float_flag_output_denormal_flushed ? FPUS_UE | FPUS_PE : 3275 0)); 3276 } 3277 3278 void helper_update_mxcsr(CPUX86State *env) 3279 { 3280 update_mxcsr_from_sse_status(env); 3281 } 3282 3283 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3284 { 3285 cpu_set_mxcsr(env, val); 3286 } 3287 3288 void helper_enter_mmx(CPUX86State *env) 3289 { 3290 env->fpstt = 0; 3291 *(uint32_t *)(env->fptags) = 0; 3292 *(uint32_t *)(env->fptags + 4) = 0; 3293 } 3294 3295 void helper_emms(CPUX86State *env) 3296 { 3297 /* set to empty state */ 3298 *(uint32_t *)(env->fptags) = 0x01010101; 3299 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3300 } 3301 3302 #define SHIFT 0 3303 #include "ops_sse.h" 3304 3305 #define SHIFT 1 3306 #include "ops_sse.h" 3307 3308 #define SHIFT 2 3309 #include "ops_sse.h" 3310