1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "tcg-cpu.h"
24 #include "exec/cputlb.h"
25 #include "accel/tcg/cpu-ldst.h"
26 #include "exec/helper-proto.h"
27 #include "fpu/softfloat.h"
28 #include "fpu/softfloat-macros.h"
29 #include "helper-tcg.h"
30 #include "access.h"
31
32 /* float macros */
33 #define FT0 (env->ft0)
34 #define ST0 (env->fpregs[env->fpstt].d)
35 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
36 #define ST1 ST(1)
37
38 #define FPU_RC_SHIFT 10
39 #define FPU_RC_MASK (3 << FPU_RC_SHIFT)
40 #define FPU_RC_NEAR 0x000
41 #define FPU_RC_DOWN 0x400
42 #define FPU_RC_UP 0x800
43 #define FPU_RC_CHOP 0xc00
44
45 #define MAXTAN 9223372036854775808.0
46
47 /* the following deal with x86 long double-precision numbers */
48 #define MAXEXPD 0x7fff
49 #define EXPBIAS 16383
50 #define EXPD(fp) (fp.l.upper & 0x7fff)
51 #define SIGND(fp) ((fp.l.upper) & 0x8000)
52 #define MANTD(fp) (fp.l.lower)
53 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
54
55 #define FPUS_IE (1 << 0)
56 #define FPUS_DE (1 << 1)
57 #define FPUS_ZE (1 << 2)
58 #define FPUS_OE (1 << 3)
59 #define FPUS_UE (1 << 4)
60 #define FPUS_PE (1 << 5)
61 #define FPUS_SF (1 << 6)
62 #define FPUS_SE (1 << 7)
63 #define FPUS_B (1 << 15)
64
65 #define FPUC_EM 0x3f
66
67 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
68 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
69 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
70 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
71 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
72 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
73 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
74 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
75
fpush(CPUX86State * env)76 static inline void fpush(CPUX86State *env)
77 {
78 env->fpstt = (env->fpstt - 1) & 7;
79 env->fptags[env->fpstt] = 0; /* validate stack entry */
80 }
81
fpop(CPUX86State * env)82 static inline void fpop(CPUX86State *env)
83 {
84 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
85 env->fpstt = (env->fpstt + 1) & 7;
86 }
87
do_fldt(X86Access * ac,target_ulong ptr)88 static floatx80 do_fldt(X86Access *ac, target_ulong ptr)
89 {
90 CPU_LDoubleU temp;
91
92 temp.l.lower = access_ldq(ac, ptr);
93 temp.l.upper = access_ldw(ac, ptr + 8);
94 return temp.d;
95 }
96
do_fstt(X86Access * ac,target_ulong ptr,floatx80 f)97 static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f)
98 {
99 CPU_LDoubleU temp;
100
101 temp.d = f;
102 access_stq(ac, ptr, temp.l.lower);
103 access_stw(ac, ptr + 8, temp.l.upper);
104 }
105
106 /* x87 FPU helpers */
107
floatx80_to_double(CPUX86State * env,floatx80 a)108 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
109 {
110 union {
111 float64 f64;
112 double d;
113 } u;
114
115 u.f64 = floatx80_to_float64(a, &env->fp_status);
116 return u.d;
117 }
118
double_to_floatx80(CPUX86State * env,double a)119 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
120 {
121 union {
122 float64 f64;
123 double d;
124 } u;
125
126 u.d = a;
127 return float64_to_floatx80(u.f64, &env->fp_status);
128 }
129
fpu_set_exception(CPUX86State * env,int mask)130 static void fpu_set_exception(CPUX86State *env, int mask)
131 {
132 env->fpus |= mask;
133 if (env->fpus & (~env->fpuc & FPUC_EM)) {
134 env->fpus |= FPUS_SE | FPUS_B;
135 }
136 }
137
cpu_init_fp_statuses(CPUX86State * env)138 void cpu_init_fp_statuses(CPUX86State *env)
139 {
140 /*
141 * Initialise the non-runtime-varying fields of the various
142 * float_status words to x86 behaviour. This must be called at
143 * CPU reset because the float_status words are in the
144 * "zeroed on reset" portion of the CPU state struct.
145 * Fields in float_status that vary under guest control are set
146 * via the codepath for setting that register, eg cpu_set_fpuc().
147 */
148 /*
149 * Use x87 NaN propagation rules:
150 * SNaN + QNaN => return the QNaN
151 * two SNaNs => return the one with the larger significand, silenced
152 * two QNaNs => return the one with the larger significand
153 * SNaN and a non-NaN => return the SNaN, silenced
154 * QNaN and a non-NaN => return the QNaN
155 *
156 * If we get down to comparing significands and they are the same,
157 * return the NaN with the positive sign bit (if any).
158 */
159 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
160 /*
161 * TODO: These are incorrect: the x86 Software Developer's Manual vol 1
162 * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the
163 * "larger significand" behaviour is only used for x87 FPU operations.
164 * For SSE the required behaviour is to always return the first NaN,
165 * which is float_2nan_prop_ab.
166 *
167 * mmx_status is used only for the AMD 3DNow! instructions, which
168 * are documented in the "3DNow! Technology Manual" as not supporting
169 * NaNs or infinities as inputs. The result of passing two NaNs is
170 * documented as "undefined", so we can do what we choose.
171 * (Strictly there is some behaviour we don't implement correctly
172 * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".)
173 */
174 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status);
175 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status);
176 /*
177 * Only SSE has multiply-add instructions. In the SDM Section 14.5.2
178 * "Fused-Multiply-ADD (FMA) Numeric Behavior" the NaN handling is
179 * specified -- for 0 * inf + NaN the input NaN is selected, and if
180 * there are multiple input NaNs they are selected in the order a, b, c.
181 * We also do not raise Invalid for the 0 * inf + (Q)NaN case.
182 */
183 set_float_infzeronan_rule(float_infzeronan_dnan_never |
184 float_infzeronan_suppress_invalid,
185 &env->sse_status);
186 set_float_3nan_prop_rule(float_3nan_prop_abc, &env->sse_status);
187 /* Default NaN: sign bit set, most significant frac bit set */
188 set_float_default_nan_pattern(0b11000000, &env->fp_status);
189 set_float_default_nan_pattern(0b11000000, &env->mmx_status);
190 set_float_default_nan_pattern(0b11000000, &env->sse_status);
191 /*
192 * x86 does flush-to-zero detection after rounding (the SDM
193 * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush
194 * when we detect underflow, which x86 does after rounding).
195 */
196 set_float_ftz_detection(float_ftz_after_rounding, &env->fp_status);
197 set_float_ftz_detection(float_ftz_after_rounding, &env->mmx_status);
198 set_float_ftz_detection(float_ftz_after_rounding, &env->sse_status);
199 }
200
save_exception_flags(CPUX86State * env)201 static inline int save_exception_flags(CPUX86State *env)
202 {
203 int old_flags = get_float_exception_flags(&env->fp_status);
204 set_float_exception_flags(0, &env->fp_status);
205 return old_flags;
206 }
207
merge_exception_flags(CPUX86State * env,int old_flags)208 static void merge_exception_flags(CPUX86State *env, int old_flags)
209 {
210 int new_flags = get_float_exception_flags(&env->fp_status);
211 float_raise(old_flags, &env->fp_status);
212 fpu_set_exception(env,
213 ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
214 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
215 (new_flags & float_flag_overflow ? FPUS_OE : 0) |
216 (new_flags & float_flag_underflow ? FPUS_UE : 0) |
217 (new_flags & float_flag_inexact ? FPUS_PE : 0) |
218 (new_flags & float_flag_input_denormal_used ? FPUS_DE : 0)));
219 }
220
helper_fdiv(CPUX86State * env,floatx80 a,floatx80 b)221 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
222 {
223 int old_flags = save_exception_flags(env);
224 floatx80 ret = floatx80_div(a, b, &env->fp_status);
225 merge_exception_flags(env, old_flags);
226 return ret;
227 }
228
fpu_raise_exception(CPUX86State * env,uintptr_t retaddr)229 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
230 {
231 if (env->cr[0] & CR0_NE_MASK) {
232 raise_exception_ra(env, EXCP10_COPR, retaddr);
233 }
234 #if !defined(CONFIG_USER_ONLY)
235 else {
236 fpu_check_raise_ferr_irq(env);
237 }
238 #endif
239 }
240
helper_flds_FT0(CPUX86State * env,uint32_t val)241 void helper_flds_FT0(CPUX86State *env, uint32_t val)
242 {
243 int old_flags = save_exception_flags(env);
244 union {
245 float32 f;
246 uint32_t i;
247 } u;
248
249 u.i = val;
250 FT0 = float32_to_floatx80(u.f, &env->fp_status);
251 merge_exception_flags(env, old_flags);
252 }
253
helper_fldl_FT0(CPUX86State * env,uint64_t val)254 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
255 {
256 int old_flags = save_exception_flags(env);
257 union {
258 float64 f;
259 uint64_t i;
260 } u;
261
262 u.i = val;
263 FT0 = float64_to_floatx80(u.f, &env->fp_status);
264 merge_exception_flags(env, old_flags);
265 }
266
helper_fildl_FT0(CPUX86State * env,int32_t val)267 void helper_fildl_FT0(CPUX86State *env, int32_t val)
268 {
269 FT0 = int32_to_floatx80(val, &env->fp_status);
270 }
271
helper_flds_ST0(CPUX86State * env,uint32_t val)272 void helper_flds_ST0(CPUX86State *env, uint32_t val)
273 {
274 int old_flags = save_exception_flags(env);
275 int new_fpstt;
276 union {
277 float32 f;
278 uint32_t i;
279 } u;
280
281 new_fpstt = (env->fpstt - 1) & 7;
282 u.i = val;
283 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
284 env->fpstt = new_fpstt;
285 env->fptags[new_fpstt] = 0; /* validate stack entry */
286 merge_exception_flags(env, old_flags);
287 }
288
helper_fldl_ST0(CPUX86State * env,uint64_t val)289 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
290 {
291 int old_flags = save_exception_flags(env);
292 int new_fpstt;
293 union {
294 float64 f;
295 uint64_t i;
296 } u;
297
298 new_fpstt = (env->fpstt - 1) & 7;
299 u.i = val;
300 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
301 env->fpstt = new_fpstt;
302 env->fptags[new_fpstt] = 0; /* validate stack entry */
303 merge_exception_flags(env, old_flags);
304 }
305
tmp_maximise_precision(float_status * st)306 static FloatX80RoundPrec tmp_maximise_precision(float_status *st)
307 {
308 FloatX80RoundPrec old = get_floatx80_rounding_precision(st);
309 set_floatx80_rounding_precision(floatx80_precision_x, st);
310 return old;
311 }
312
helper_fildl_ST0(CPUX86State * env,int32_t val)313 void helper_fildl_ST0(CPUX86State *env, int32_t val)
314 {
315 int new_fpstt;
316 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
317
318 new_fpstt = (env->fpstt - 1) & 7;
319 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
320 env->fpstt = new_fpstt;
321 env->fptags[new_fpstt] = 0; /* validate stack entry */
322
323 set_floatx80_rounding_precision(old, &env->fp_status);
324 }
325
helper_fildll_ST0(CPUX86State * env,int64_t val)326 void helper_fildll_ST0(CPUX86State *env, int64_t val)
327 {
328 int new_fpstt;
329 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
330
331 new_fpstt = (env->fpstt - 1) & 7;
332 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
333 env->fpstt = new_fpstt;
334 env->fptags[new_fpstt] = 0; /* validate stack entry */
335
336 set_floatx80_rounding_precision(old, &env->fp_status);
337 }
338
helper_fsts_ST0(CPUX86State * env)339 uint32_t helper_fsts_ST0(CPUX86State *env)
340 {
341 int old_flags = save_exception_flags(env);
342 union {
343 float32 f;
344 uint32_t i;
345 } u;
346
347 u.f = floatx80_to_float32(ST0, &env->fp_status);
348 merge_exception_flags(env, old_flags);
349 return u.i;
350 }
351
helper_fstl_ST0(CPUX86State * env)352 uint64_t helper_fstl_ST0(CPUX86State *env)
353 {
354 int old_flags = save_exception_flags(env);
355 union {
356 float64 f;
357 uint64_t i;
358 } u;
359
360 u.f = floatx80_to_float64(ST0, &env->fp_status);
361 merge_exception_flags(env, old_flags);
362 return u.i;
363 }
364
helper_fist_ST0(CPUX86State * env)365 int32_t helper_fist_ST0(CPUX86State *env)
366 {
367 int old_flags = save_exception_flags(env);
368 int32_t val;
369
370 val = floatx80_to_int32(ST0, &env->fp_status);
371 if (val != (int16_t)val) {
372 set_float_exception_flags(float_flag_invalid, &env->fp_status);
373 val = -32768;
374 }
375 merge_exception_flags(env, old_flags);
376 return val;
377 }
378
helper_fistl_ST0(CPUX86State * env)379 int32_t helper_fistl_ST0(CPUX86State *env)
380 {
381 int old_flags = save_exception_flags(env);
382 int32_t val;
383
384 val = floatx80_to_int32(ST0, &env->fp_status);
385 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
386 val = 0x80000000;
387 }
388 merge_exception_flags(env, old_flags);
389 return val;
390 }
391
helper_fistll_ST0(CPUX86State * env)392 int64_t helper_fistll_ST0(CPUX86State *env)
393 {
394 int old_flags = save_exception_flags(env);
395 int64_t val;
396
397 val = floatx80_to_int64(ST0, &env->fp_status);
398 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
399 val = 0x8000000000000000ULL;
400 }
401 merge_exception_flags(env, old_flags);
402 return val;
403 }
404
helper_fistt_ST0(CPUX86State * env)405 int32_t helper_fistt_ST0(CPUX86State *env)
406 {
407 int old_flags = save_exception_flags(env);
408 int32_t val;
409
410 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
411 if (val != (int16_t)val) {
412 set_float_exception_flags(float_flag_invalid, &env->fp_status);
413 val = -32768;
414 }
415 merge_exception_flags(env, old_flags);
416 return val;
417 }
418
helper_fisttl_ST0(CPUX86State * env)419 int32_t helper_fisttl_ST0(CPUX86State *env)
420 {
421 int old_flags = save_exception_flags(env);
422 int32_t val;
423
424 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
425 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
426 val = 0x80000000;
427 }
428 merge_exception_flags(env, old_flags);
429 return val;
430 }
431
helper_fisttll_ST0(CPUX86State * env)432 int64_t helper_fisttll_ST0(CPUX86State *env)
433 {
434 int old_flags = save_exception_flags(env);
435 int64_t val;
436
437 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
438 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
439 val = 0x8000000000000000ULL;
440 }
441 merge_exception_flags(env, old_flags);
442 return val;
443 }
444
helper_fldt_ST0(CPUX86State * env,target_ulong ptr)445 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
446 {
447 int new_fpstt;
448 X86Access ac;
449
450 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
451
452 new_fpstt = (env->fpstt - 1) & 7;
453 env->fpregs[new_fpstt].d = do_fldt(&ac, ptr);
454 env->fpstt = new_fpstt;
455 env->fptags[new_fpstt] = 0; /* validate stack entry */
456 }
457
helper_fstt_ST0(CPUX86State * env,target_ulong ptr)458 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
459 {
460 X86Access ac;
461
462 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
463 do_fstt(&ac, ptr, ST0);
464 }
465
helper_fpush(CPUX86State * env)466 void helper_fpush(CPUX86State *env)
467 {
468 fpush(env);
469 }
470
helper_fpop(CPUX86State * env)471 void helper_fpop(CPUX86State *env)
472 {
473 fpop(env);
474 }
475
helper_fdecstp(CPUX86State * env)476 void helper_fdecstp(CPUX86State *env)
477 {
478 env->fpstt = (env->fpstt - 1) & 7;
479 env->fpus &= ~0x4700;
480 }
481
helper_fincstp(CPUX86State * env)482 void helper_fincstp(CPUX86State *env)
483 {
484 env->fpstt = (env->fpstt + 1) & 7;
485 env->fpus &= ~0x4700;
486 }
487
488 /* FPU move */
489
helper_ffree_STN(CPUX86State * env,int st_index)490 void helper_ffree_STN(CPUX86State *env, int st_index)
491 {
492 env->fptags[(env->fpstt + st_index) & 7] = 1;
493 }
494
helper_fmov_ST0_FT0(CPUX86State * env)495 void helper_fmov_ST0_FT0(CPUX86State *env)
496 {
497 ST0 = FT0;
498 }
499
helper_fmov_FT0_STN(CPUX86State * env,int st_index)500 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
501 {
502 FT0 = ST(st_index);
503 }
504
helper_fmov_ST0_STN(CPUX86State * env,int st_index)505 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
506 {
507 ST0 = ST(st_index);
508 }
509
helper_fmov_STN_ST0(CPUX86State * env,int st_index)510 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
511 {
512 ST(st_index) = ST0;
513 }
514
helper_fxchg_ST0_STN(CPUX86State * env,int st_index)515 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
516 {
517 floatx80 tmp;
518
519 tmp = ST(st_index);
520 ST(st_index) = ST0;
521 ST0 = tmp;
522 }
523
524 /* FPU operations */
525
526 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
527
helper_fcom_ST0_FT0(CPUX86State * env)528 void helper_fcom_ST0_FT0(CPUX86State *env)
529 {
530 int old_flags = save_exception_flags(env);
531 FloatRelation ret;
532
533 ret = floatx80_compare(ST0, FT0, &env->fp_status);
534 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
535 merge_exception_flags(env, old_flags);
536 }
537
helper_fucom_ST0_FT0(CPUX86State * env)538 void helper_fucom_ST0_FT0(CPUX86State *env)
539 {
540 int old_flags = save_exception_flags(env);
541 FloatRelation ret;
542
543 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
544 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
545 merge_exception_flags(env, old_flags);
546 }
547
548 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
549
helper_fcomi_ST0_FT0(CPUX86State * env)550 void helper_fcomi_ST0_FT0(CPUX86State *env)
551 {
552 int old_flags = save_exception_flags(env);
553 int eflags;
554 FloatRelation ret;
555
556 ret = floatx80_compare(ST0, FT0, &env->fp_status);
557 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C);
558 CC_SRC = eflags | fcomi_ccval[ret + 1];
559 CC_OP = CC_OP_EFLAGS;
560 merge_exception_flags(env, old_flags);
561 }
562
helper_fucomi_ST0_FT0(CPUX86State * env)563 void helper_fucomi_ST0_FT0(CPUX86State *env)
564 {
565 int old_flags = save_exception_flags(env);
566 int eflags;
567 FloatRelation ret;
568
569 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
570 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C);
571 CC_SRC = eflags | fcomi_ccval[ret + 1];
572 CC_OP = CC_OP_EFLAGS;
573 merge_exception_flags(env, old_flags);
574 }
575
helper_fadd_ST0_FT0(CPUX86State * env)576 void helper_fadd_ST0_FT0(CPUX86State *env)
577 {
578 int old_flags = save_exception_flags(env);
579 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
580 merge_exception_flags(env, old_flags);
581 }
582
helper_fmul_ST0_FT0(CPUX86State * env)583 void helper_fmul_ST0_FT0(CPUX86State *env)
584 {
585 int old_flags = save_exception_flags(env);
586 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
587 merge_exception_flags(env, old_flags);
588 }
589
helper_fsub_ST0_FT0(CPUX86State * env)590 void helper_fsub_ST0_FT0(CPUX86State *env)
591 {
592 int old_flags = save_exception_flags(env);
593 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
594 merge_exception_flags(env, old_flags);
595 }
596
helper_fsubr_ST0_FT0(CPUX86State * env)597 void helper_fsubr_ST0_FT0(CPUX86State *env)
598 {
599 int old_flags = save_exception_flags(env);
600 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
601 merge_exception_flags(env, old_flags);
602 }
603
helper_fdiv_ST0_FT0(CPUX86State * env)604 void helper_fdiv_ST0_FT0(CPUX86State *env)
605 {
606 ST0 = helper_fdiv(env, ST0, FT0);
607 }
608
helper_fdivr_ST0_FT0(CPUX86State * env)609 void helper_fdivr_ST0_FT0(CPUX86State *env)
610 {
611 ST0 = helper_fdiv(env, FT0, ST0);
612 }
613
614 /* fp operations between STN and ST0 */
615
helper_fadd_STN_ST0(CPUX86State * env,int st_index)616 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
617 {
618 int old_flags = save_exception_flags(env);
619 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
620 merge_exception_flags(env, old_flags);
621 }
622
helper_fmul_STN_ST0(CPUX86State * env,int st_index)623 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
624 {
625 int old_flags = save_exception_flags(env);
626 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
627 merge_exception_flags(env, old_flags);
628 }
629
helper_fsub_STN_ST0(CPUX86State * env,int st_index)630 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
631 {
632 int old_flags = save_exception_flags(env);
633 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
634 merge_exception_flags(env, old_flags);
635 }
636
helper_fsubr_STN_ST0(CPUX86State * env,int st_index)637 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
638 {
639 int old_flags = save_exception_flags(env);
640 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
641 merge_exception_flags(env, old_flags);
642 }
643
helper_fdiv_STN_ST0(CPUX86State * env,int st_index)644 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
645 {
646 floatx80 *p;
647
648 p = &ST(st_index);
649 *p = helper_fdiv(env, *p, ST0);
650 }
651
helper_fdivr_STN_ST0(CPUX86State * env,int st_index)652 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
653 {
654 floatx80 *p;
655
656 p = &ST(st_index);
657 *p = helper_fdiv(env, ST0, *p);
658 }
659
660 /* misc FPU operations */
helper_fchs_ST0(CPUX86State * env)661 void helper_fchs_ST0(CPUX86State *env)
662 {
663 ST0 = floatx80_chs(ST0);
664 }
665
helper_fabs_ST0(CPUX86State * env)666 void helper_fabs_ST0(CPUX86State *env)
667 {
668 ST0 = floatx80_abs(ST0);
669 }
670
helper_fld1_ST0(CPUX86State * env)671 void helper_fld1_ST0(CPUX86State *env)
672 {
673 ST0 = floatx80_one;
674 }
675
helper_fldl2t_ST0(CPUX86State * env)676 void helper_fldl2t_ST0(CPUX86State *env)
677 {
678 switch (env->fpuc & FPU_RC_MASK) {
679 case FPU_RC_UP:
680 ST0 = floatx80_l2t_u;
681 break;
682 default:
683 ST0 = floatx80_l2t;
684 break;
685 }
686 }
687
helper_fldl2e_ST0(CPUX86State * env)688 void helper_fldl2e_ST0(CPUX86State *env)
689 {
690 switch (env->fpuc & FPU_RC_MASK) {
691 case FPU_RC_DOWN:
692 case FPU_RC_CHOP:
693 ST0 = floatx80_l2e_d;
694 break;
695 default:
696 ST0 = floatx80_l2e;
697 break;
698 }
699 }
700
helper_fldpi_ST0(CPUX86State * env)701 void helper_fldpi_ST0(CPUX86State *env)
702 {
703 switch (env->fpuc & FPU_RC_MASK) {
704 case FPU_RC_DOWN:
705 case FPU_RC_CHOP:
706 ST0 = floatx80_pi_d;
707 break;
708 default:
709 ST0 = floatx80_pi;
710 break;
711 }
712 }
713
helper_fldlg2_ST0(CPUX86State * env)714 void helper_fldlg2_ST0(CPUX86State *env)
715 {
716 switch (env->fpuc & FPU_RC_MASK) {
717 case FPU_RC_DOWN:
718 case FPU_RC_CHOP:
719 ST0 = floatx80_lg2_d;
720 break;
721 default:
722 ST0 = floatx80_lg2;
723 break;
724 }
725 }
726
helper_fldln2_ST0(CPUX86State * env)727 void helper_fldln2_ST0(CPUX86State *env)
728 {
729 switch (env->fpuc & FPU_RC_MASK) {
730 case FPU_RC_DOWN:
731 case FPU_RC_CHOP:
732 ST0 = floatx80_ln2_d;
733 break;
734 default:
735 ST0 = floatx80_ln2;
736 break;
737 }
738 }
739
helper_fldz_ST0(CPUX86State * env)740 void helper_fldz_ST0(CPUX86State *env)
741 {
742 ST0 = floatx80_zero;
743 }
744
helper_fldz_FT0(CPUX86State * env)745 void helper_fldz_FT0(CPUX86State *env)
746 {
747 FT0 = floatx80_zero;
748 }
749
helper_fnstsw(CPUX86State * env)750 uint32_t helper_fnstsw(CPUX86State *env)
751 {
752 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
753 }
754
helper_fnstcw(CPUX86State * env)755 uint32_t helper_fnstcw(CPUX86State *env)
756 {
757 return env->fpuc;
758 }
759
set_x86_rounding_mode(unsigned mode,float_status * status)760 static void set_x86_rounding_mode(unsigned mode, float_status *status)
761 {
762 static FloatRoundMode x86_round_mode[4] = {
763 float_round_nearest_even,
764 float_round_down,
765 float_round_up,
766 float_round_to_zero
767 };
768 assert(mode < ARRAY_SIZE(x86_round_mode));
769 set_float_rounding_mode(x86_round_mode[mode], status);
770 }
771
update_fp_status(CPUX86State * env)772 void update_fp_status(CPUX86State *env)
773 {
774 int rnd_mode;
775 FloatX80RoundPrec rnd_prec;
776
777 /* set rounding mode */
778 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT;
779 set_x86_rounding_mode(rnd_mode, &env->fp_status);
780
781 switch ((env->fpuc >> 8) & 3) {
782 case 0:
783 rnd_prec = floatx80_precision_s;
784 break;
785 case 2:
786 rnd_prec = floatx80_precision_d;
787 break;
788 case 3:
789 default:
790 rnd_prec = floatx80_precision_x;
791 break;
792 }
793 set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
794 }
795
helper_fldcw(CPUX86State * env,uint32_t val)796 void helper_fldcw(CPUX86State *env, uint32_t val)
797 {
798 cpu_set_fpuc(env, val);
799 }
800
helper_fclex(CPUX86State * env)801 void helper_fclex(CPUX86State *env)
802 {
803 env->fpus &= 0x7f00;
804 }
805
helper_fwait(CPUX86State * env)806 void helper_fwait(CPUX86State *env)
807 {
808 if (env->fpus & FPUS_SE) {
809 fpu_raise_exception(env, GETPC());
810 }
811 }
812
do_fninit(CPUX86State * env)813 static void do_fninit(CPUX86State *env)
814 {
815 env->fpus = 0;
816 env->fpstt = 0;
817 env->fpcs = 0;
818 env->fpds = 0;
819 env->fpip = 0;
820 env->fpdp = 0;
821 cpu_set_fpuc(env, 0x37f);
822 env->fptags[0] = 1;
823 env->fptags[1] = 1;
824 env->fptags[2] = 1;
825 env->fptags[3] = 1;
826 env->fptags[4] = 1;
827 env->fptags[5] = 1;
828 env->fptags[6] = 1;
829 env->fptags[7] = 1;
830 }
831
helper_fninit(CPUX86State * env)832 void helper_fninit(CPUX86State *env)
833 {
834 do_fninit(env);
835 }
836
837 /* BCD ops */
838
helper_fbld_ST0(CPUX86State * env,target_ulong ptr)839 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
840 {
841 X86Access ac;
842 floatx80 tmp;
843 uint64_t val;
844 unsigned int v;
845 int i;
846
847 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
848
849 val = 0;
850 for (i = 8; i >= 0; i--) {
851 v = access_ldb(&ac, ptr + i);
852 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
853 }
854 tmp = int64_to_floatx80(val, &env->fp_status);
855 if (access_ldb(&ac, ptr + 9) & 0x80) {
856 tmp = floatx80_chs(tmp);
857 }
858 fpush(env);
859 ST0 = tmp;
860 }
861
helper_fbst_ST0(CPUX86State * env,target_ulong ptr)862 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
863 {
864 int old_flags = save_exception_flags(env);
865 int v;
866 target_ulong mem_ref, mem_end;
867 int64_t val;
868 CPU_LDoubleU temp;
869 X86Access ac;
870
871 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
872 temp.d = ST0;
873
874 val = floatx80_to_int64(ST0, &env->fp_status);
875 mem_ref = ptr;
876 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
877 set_float_exception_flags(float_flag_invalid, &env->fp_status);
878 while (mem_ref < ptr + 7) {
879 access_stb(&ac, mem_ref++, 0);
880 }
881 access_stb(&ac, mem_ref++, 0xc0);
882 access_stb(&ac, mem_ref++, 0xff);
883 access_stb(&ac, mem_ref++, 0xff);
884 merge_exception_flags(env, old_flags);
885 return;
886 }
887 mem_end = mem_ref + 9;
888 if (SIGND(temp)) {
889 access_stb(&ac, mem_end, 0x80);
890 val = -val;
891 } else {
892 access_stb(&ac, mem_end, 0x00);
893 }
894 while (mem_ref < mem_end) {
895 if (val == 0) {
896 break;
897 }
898 v = val % 100;
899 val = val / 100;
900 v = ((v / 10) << 4) | (v % 10);
901 access_stb(&ac, mem_ref++, v);
902 }
903 while (mem_ref < mem_end) {
904 access_stb(&ac, mem_ref++, 0);
905 }
906 merge_exception_flags(env, old_flags);
907 }
908
909 /* 128-bit significand of log(2). */
910 #define ln2_sig_high 0xb17217f7d1cf79abULL
911 #define ln2_sig_low 0xc9e3b39803f2f6afULL
912
913 /*
914 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
915 * the interval [-1/64, 1/64].
916 */
917 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
918 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
919 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
920 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
921 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
922 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
923 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
924 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
925 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
926
927 struct f2xm1_data {
928 /*
929 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
930 * are very close to exact floatx80 values.
931 */
932 floatx80 t;
933 /* The value of 2^t. */
934 floatx80 exp2;
935 /* The value of 2^t - 1. */
936 floatx80 exp2m1;
937 };
938
939 static const struct f2xm1_data f2xm1_table[65] = {
940 { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
941 make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
942 make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
943 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
944 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
945 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
946 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
947 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
948 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
949 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
950 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
951 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
952 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
953 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
954 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
955 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
956 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
957 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
958 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
959 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
960 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
961 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
962 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
963 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
964 { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
965 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
966 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
967 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
968 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
969 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
970 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
971 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
972 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
973 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
974 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
975 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
976 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
977 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
978 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
979 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
980 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
981 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
982 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
983 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
984 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
985 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
986 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
987 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
988 { make_floatx80_init(0xbffe, 0x800000000000227dULL),
989 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
990 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
991 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
992 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
993 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
994 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
995 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
996 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
997 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
998 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
999 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
1000 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
1001 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
1002 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
1003 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
1004 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
1005 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
1006 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
1007 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
1008 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
1009 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
1010 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
1011 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
1012 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
1013 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
1014 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
1015 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
1016 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
1017 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
1018 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
1019 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
1020 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
1021 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
1022 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
1023 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
1024 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
1025 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
1026 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
1027 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
1028 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
1029 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
1030 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
1031 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
1032 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
1033 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
1034 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
1035 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
1036 { floatx80_zero_init,
1037 make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1038 floatx80_zero_init },
1039 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
1040 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
1041 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
1042 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
1043 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
1044 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
1045 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
1046 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
1047 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
1048 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
1049 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
1050 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
1051 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
1052 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
1053 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
1054 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
1055 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
1056 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
1057 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
1058 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
1059 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
1060 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
1061 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
1062 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
1063 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
1064 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
1065 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
1066 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
1067 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
1068 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
1069 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
1070 make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
1071 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
1072 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
1073 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
1074 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
1075 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
1076 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1077 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1078 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
1079 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
1080 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
1081 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
1082 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
1083 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1084 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
1085 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
1086 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
1087 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
1088 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1089 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1090 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
1091 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
1092 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
1093 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
1094 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
1095 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
1096 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
1097 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
1098 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
1099 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
1100 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
1101 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
1102 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
1103 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
1104 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
1105 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
1106 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
1107 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
1108 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
1109 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
1110 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
1111 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
1112 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
1113 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1114 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
1115 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
1116 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
1117 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
1118 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
1119 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
1120 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
1121 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
1122 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
1123 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
1124 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
1125 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1126 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
1127 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
1128 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
1129 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
1130 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
1131 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
1132 { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1133 make_floatx80_init(0x4000, 0x8000000000000000ULL),
1134 make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
1135 };
1136
helper_f2xm1(CPUX86State * env)1137 void helper_f2xm1(CPUX86State *env)
1138 {
1139 int old_flags = save_exception_flags(env);
1140 uint64_t sig = extractFloatx80Frac(ST0);
1141 int32_t exp = extractFloatx80Exp(ST0);
1142 bool sign = extractFloatx80Sign(ST0);
1143
1144 if (floatx80_invalid_encoding(ST0, &env->fp_status)) {
1145 float_raise(float_flag_invalid, &env->fp_status);
1146 ST0 = floatx80_default_nan(&env->fp_status);
1147 } else if (floatx80_is_any_nan(ST0)) {
1148 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1149 float_raise(float_flag_invalid, &env->fp_status);
1150 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1151 }
1152 } else if (exp > 0x3fff ||
1153 (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1154 /* Out of range for the instruction, treat as invalid. */
1155 float_raise(float_flag_invalid, &env->fp_status);
1156 ST0 = floatx80_default_nan(&env->fp_status);
1157 } else if (exp == 0x3fff) {
1158 /* Argument 1 or -1, exact result 1 or -0.5. */
1159 if (sign) {
1160 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1161 }
1162 } else if (exp < 0x3fb0) {
1163 if (!floatx80_is_zero(ST0)) {
1164 /*
1165 * Multiplying the argument by an extra-precision version
1166 * of log(2) is sufficiently precise. Zero arguments are
1167 * returned unchanged.
1168 */
1169 uint64_t sig0, sig1, sig2;
1170 if (exp == 0) {
1171 normalizeFloatx80Subnormal(sig, &exp, &sig);
1172 }
1173 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1174 &sig2);
1175 /* This result is inexact. */
1176 sig1 |= 1;
1177 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1178 sign, exp, sig0, sig1,
1179 &env->fp_status);
1180 }
1181 } else {
1182 floatx80 tmp, y, accum;
1183 bool asign, bsign;
1184 int32_t n, aexp, bexp;
1185 uint64_t asig0, asig1, asig2, bsig0, bsig1;
1186 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1187 FloatX80RoundPrec save_prec =
1188 env->fp_status.floatx80_rounding_precision;
1189 env->fp_status.float_rounding_mode = float_round_nearest_even;
1190 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1191
1192 /* Find the nearest multiple of 1/32 to the argument. */
1193 tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1194 n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1195 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1196
1197 if (floatx80_is_zero(y)) {
1198 /*
1199 * Use the value of 2^t - 1 from the table, to avoid
1200 * needing to special-case zero as a result of
1201 * multiplication below.
1202 */
1203 ST0 = f2xm1_table[n].t;
1204 set_float_exception_flags(float_flag_inexact, &env->fp_status);
1205 env->fp_status.float_rounding_mode = save_mode;
1206 } else {
1207 /*
1208 * Compute the lower parts of a polynomial expansion for
1209 * (2^y - 1) / y.
1210 */
1211 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1212 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1213 accum = floatx80_mul(accum, y, &env->fp_status);
1214 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1215 accum = floatx80_mul(accum, y, &env->fp_status);
1216 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1217 accum = floatx80_mul(accum, y, &env->fp_status);
1218 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1219 accum = floatx80_mul(accum, y, &env->fp_status);
1220 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1221 accum = floatx80_mul(accum, y, &env->fp_status);
1222 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1223 accum = floatx80_mul(accum, y, &env->fp_status);
1224 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1225
1226 /*
1227 * The full polynomial expansion is f2xm1_coeff_0 + accum
1228 * (where accum has much lower magnitude, and so, in
1229 * particular, carry out of the addition is not possible).
1230 * (This expansion is only accurate to about 70 bits, not
1231 * 128 bits.)
1232 */
1233 aexp = extractFloatx80Exp(f2xm1_coeff_0);
1234 asign = extractFloatx80Sign(f2xm1_coeff_0);
1235 shift128RightJamming(extractFloatx80Frac(accum), 0,
1236 aexp - extractFloatx80Exp(accum),
1237 &asig0, &asig1);
1238 bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1239 bsig1 = 0;
1240 if (asign == extractFloatx80Sign(accum)) {
1241 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1242 } else {
1243 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1244 }
1245 /* And thus compute an approximation to 2^y - 1. */
1246 mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1247 &asig0, &asig1, &asig2);
1248 aexp += extractFloatx80Exp(y) - 0x3ffe;
1249 asign ^= extractFloatx80Sign(y);
1250 if (n != 32) {
1251 /*
1252 * Multiply this by the precomputed value of 2^t and
1253 * add that of 2^t - 1.
1254 */
1255 mul128By64To192(asig0, asig1,
1256 extractFloatx80Frac(f2xm1_table[n].exp2),
1257 &asig0, &asig1, &asig2);
1258 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1259 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1260 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1261 bsig1 = 0;
1262 if (bexp < aexp) {
1263 shift128RightJamming(bsig0, bsig1, aexp - bexp,
1264 &bsig0, &bsig1);
1265 } else if (aexp < bexp) {
1266 shift128RightJamming(asig0, asig1, bexp - aexp,
1267 &asig0, &asig1);
1268 aexp = bexp;
1269 }
1270 /* The sign of 2^t - 1 is always that of the result. */
1271 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1272 if (asign == bsign) {
1273 /* Avoid possible carry out of the addition. */
1274 shift128RightJamming(asig0, asig1, 1,
1275 &asig0, &asig1);
1276 shift128RightJamming(bsig0, bsig1, 1,
1277 &bsig0, &bsig1);
1278 ++aexp;
1279 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1280 } else {
1281 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1282 asign = bsign;
1283 }
1284 }
1285 env->fp_status.float_rounding_mode = save_mode;
1286 /* This result is inexact. */
1287 asig1 |= 1;
1288 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1289 asign, aexp, asig0, asig1,
1290 &env->fp_status);
1291 }
1292
1293 env->fp_status.floatx80_rounding_precision = save_prec;
1294 }
1295 merge_exception_flags(env, old_flags);
1296 }
1297
helper_fptan(CPUX86State * env)1298 void helper_fptan(CPUX86State *env)
1299 {
1300 double fptemp = floatx80_to_double(env, ST0);
1301
1302 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1303 env->fpus |= 0x400;
1304 } else {
1305 fptemp = tan(fptemp);
1306 ST0 = double_to_floatx80(env, fptemp);
1307 fpush(env);
1308 ST0 = floatx80_one;
1309 env->fpus &= ~0x400; /* C2 <-- 0 */
1310 /* the above code is for |arg| < 2**52 only */
1311 }
1312 }
1313
1314 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */
1315 #define pi_4_exp 0x3ffe
1316 #define pi_4_sig_high 0xc90fdaa22168c234ULL
1317 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1318 #define pi_2_exp 0x3fff
1319 #define pi_2_sig_high 0xc90fdaa22168c234ULL
1320 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1321 #define pi_34_exp 0x4000
1322 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1323 #define pi_34_sig_low 0x9394c9e8a0a5159dULL
1324 #define pi_exp 0x4000
1325 #define pi_sig_high 0xc90fdaa22168c234ULL
1326 #define pi_sig_low 0xc4c6628b80dc1cd1ULL
1327
1328 /*
1329 * Polynomial coefficients for an approximation to atan(x), with only
1330 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike
1331 * for some other approximations, no low part is needed for the first
1332 * coefficient here to achieve a sufficiently accurate result, because
1333 * the coefficient in this minimax approximation is very close to
1334 * exactly 1.)
1335 */
1336 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1337 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1338 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1339 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1340 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1341 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1342 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1343
1344 struct fpatan_data {
1345 /* High and low parts of atan(x). */
1346 floatx80 atan_high, atan_low;
1347 };
1348
1349 static const struct fpatan_data fpatan_table[9] = {
1350 { floatx80_zero_init,
1351 floatx80_zero_init },
1352 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
1353 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
1354 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
1355 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
1356 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
1357 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
1358 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
1359 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
1360 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
1361 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
1362 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
1363 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
1364 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
1365 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
1366 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
1367 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
1368 };
1369
helper_fpatan(CPUX86State * env)1370 void helper_fpatan(CPUX86State *env)
1371 {
1372 int old_flags = save_exception_flags(env);
1373 uint64_t arg0_sig = extractFloatx80Frac(ST0);
1374 int32_t arg0_exp = extractFloatx80Exp(ST0);
1375 bool arg0_sign = extractFloatx80Sign(ST0);
1376 uint64_t arg1_sig = extractFloatx80Frac(ST1);
1377 int32_t arg1_exp = extractFloatx80Exp(ST1);
1378 bool arg1_sign = extractFloatx80Sign(ST1);
1379
1380 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1381 float_raise(float_flag_invalid, &env->fp_status);
1382 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1383 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1384 float_raise(float_flag_invalid, &env->fp_status);
1385 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1386 } else if (floatx80_invalid_encoding(ST0, &env->fp_status) ||
1387 floatx80_invalid_encoding(ST1, &env->fp_status)) {
1388 float_raise(float_flag_invalid, &env->fp_status);
1389 ST1 = floatx80_default_nan(&env->fp_status);
1390 } else if (floatx80_is_any_nan(ST0)) {
1391 ST1 = ST0;
1392 } else if (floatx80_is_any_nan(ST1)) {
1393 /* Pass this NaN through. */
1394 } else if (floatx80_is_zero(ST1) && !arg0_sign) {
1395 /* Pass this zero through. */
1396 } else if (((floatx80_is_infinity(ST0, &env->fp_status) &&
1397 !floatx80_is_infinity(ST1, &env->fp_status)) ||
1398 arg0_exp - arg1_exp >= 80) &&
1399 !arg0_sign) {
1400 /*
1401 * Dividing ST1 by ST0 gives the correct result up to
1402 * rounding, and avoids spurious underflow exceptions that
1403 * might result from passing some small values through the
1404 * polynomial approximation, but if a finite nonzero result of
1405 * division is exact, the result of fpatan is still inexact
1406 * (and underflowing where appropriate).
1407 */
1408 FloatX80RoundPrec save_prec =
1409 env->fp_status.floatx80_rounding_precision;
1410 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1411 ST1 = floatx80_div(ST1, ST0, &env->fp_status);
1412 env->fp_status.floatx80_rounding_precision = save_prec;
1413 if (!floatx80_is_zero(ST1) &&
1414 !(get_float_exception_flags(&env->fp_status) &
1415 float_flag_inexact)) {
1416 /*
1417 * The mathematical result is very slightly closer to zero
1418 * than this exact result. Round a value with the
1419 * significand adjusted accordingly to get the correct
1420 * exceptions, and possibly an adjusted result depending
1421 * on the rounding mode.
1422 */
1423 uint64_t sig = extractFloatx80Frac(ST1);
1424 int32_t exp = extractFloatx80Exp(ST1);
1425 bool sign = extractFloatx80Sign(ST1);
1426 if (exp == 0) {
1427 normalizeFloatx80Subnormal(sig, &exp, &sig);
1428 }
1429 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1430 sign, exp, sig - 1,
1431 -1, &env->fp_status);
1432 }
1433 } else {
1434 /* The result is inexact. */
1435 bool rsign = arg1_sign;
1436 int32_t rexp;
1437 uint64_t rsig0, rsig1;
1438 if (floatx80_is_zero(ST1)) {
1439 /*
1440 * ST0 is negative. The result is pi with the sign of
1441 * ST1.
1442 */
1443 rexp = pi_exp;
1444 rsig0 = pi_sig_high;
1445 rsig1 = pi_sig_low;
1446 } else if (floatx80_is_infinity(ST1, &env->fp_status)) {
1447 if (floatx80_is_infinity(ST0, &env->fp_status)) {
1448 if (arg0_sign) {
1449 rexp = pi_34_exp;
1450 rsig0 = pi_34_sig_high;
1451 rsig1 = pi_34_sig_low;
1452 } else {
1453 rexp = pi_4_exp;
1454 rsig0 = pi_4_sig_high;
1455 rsig1 = pi_4_sig_low;
1456 }
1457 } else {
1458 rexp = pi_2_exp;
1459 rsig0 = pi_2_sig_high;
1460 rsig1 = pi_2_sig_low;
1461 }
1462 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
1463 rexp = pi_2_exp;
1464 rsig0 = pi_2_sig_high;
1465 rsig1 = pi_2_sig_low;
1466 } else if (floatx80_is_infinity(ST0, &env->fp_status) ||
1467 arg0_exp - arg1_exp >= 80) {
1468 /* ST0 is negative. */
1469 rexp = pi_exp;
1470 rsig0 = pi_sig_high;
1471 rsig1 = pi_sig_low;
1472 } else {
1473 /*
1474 * ST0 and ST1 are finite, nonzero and with exponents not
1475 * too far apart.
1476 */
1477 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
1478 int32_t azexp, axexp;
1479 bool adj_sub, ysign, zsign;
1480 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
1481 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
1482 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
1483 uint64_t azsig0, azsig1;
1484 uint64_t azsig2, azsig3, axsig0, axsig1;
1485 floatx80 x8;
1486 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1487 FloatX80RoundPrec save_prec =
1488 env->fp_status.floatx80_rounding_precision;
1489 env->fp_status.float_rounding_mode = float_round_nearest_even;
1490 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1491
1492 if (arg0_exp == 0) {
1493 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1494 }
1495 if (arg1_exp == 0) {
1496 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1497 }
1498 if (arg0_exp > arg1_exp ||
1499 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
1500 /* Work with abs(ST1) / abs(ST0). */
1501 num_exp = arg1_exp;
1502 num_sig = arg1_sig;
1503 den_exp = arg0_exp;
1504 den_sig = arg0_sig;
1505 if (arg0_sign) {
1506 /* The result is subtracted from pi. */
1507 adj_exp = pi_exp;
1508 adj_sig0 = pi_sig_high;
1509 adj_sig1 = pi_sig_low;
1510 adj_sub = true;
1511 } else {
1512 /* The result is used as-is. */
1513 adj_exp = 0;
1514 adj_sig0 = 0;
1515 adj_sig1 = 0;
1516 adj_sub = false;
1517 }
1518 } else {
1519 /* Work with abs(ST0) / abs(ST1). */
1520 num_exp = arg0_exp;
1521 num_sig = arg0_sig;
1522 den_exp = arg1_exp;
1523 den_sig = arg1_sig;
1524 /* The result is added to or subtracted from pi/2. */
1525 adj_exp = pi_2_exp;
1526 adj_sig0 = pi_2_sig_high;
1527 adj_sig1 = pi_2_sig_low;
1528 adj_sub = !arg0_sign;
1529 }
1530
1531 /*
1532 * Compute x = num/den, where 0 < x <= 1 and x is not too
1533 * small.
1534 */
1535 xexp = num_exp - den_exp + 0x3ffe;
1536 remsig0 = num_sig;
1537 remsig1 = 0;
1538 if (den_sig <= remsig0) {
1539 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1540 ++xexp;
1541 }
1542 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
1543 mul64To128(den_sig, xsig0, &msig0, &msig1);
1544 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
1545 while ((int64_t) remsig0 < 0) {
1546 --xsig0;
1547 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
1548 }
1549 xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
1550 /*
1551 * No need to correct any estimation error in xsig1; even
1552 * with such error, it is accurate enough.
1553 */
1554
1555 /*
1556 * Split x as x = t + y, where t = n/8 is the nearest
1557 * multiple of 1/8 to x.
1558 */
1559 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1560 false, xexp + 3, xsig0,
1561 xsig1, &env->fp_status);
1562 n = floatx80_to_int32(x8, &env->fp_status);
1563 if (n == 0) {
1564 ysign = false;
1565 yexp = xexp;
1566 ysig0 = xsig0;
1567 ysig1 = xsig1;
1568 texp = 0;
1569 tsig = 0;
1570 } else {
1571 int shift = clz32(n) + 32;
1572 texp = 0x403b - shift;
1573 tsig = n;
1574 tsig <<= shift;
1575 if (texp == xexp) {
1576 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
1577 if ((int64_t) ysig0 >= 0) {
1578 ysign = false;
1579 if (ysig0 == 0) {
1580 if (ysig1 == 0) {
1581 yexp = 0;
1582 } else {
1583 shift = clz64(ysig1) + 64;
1584 yexp = xexp - shift;
1585 shift128Left(ysig0, ysig1, shift,
1586 &ysig0, &ysig1);
1587 }
1588 } else {
1589 shift = clz64(ysig0);
1590 yexp = xexp - shift;
1591 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1592 }
1593 } else {
1594 ysign = true;
1595 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
1596 if (ysig0 == 0) {
1597 shift = clz64(ysig1) + 64;
1598 } else {
1599 shift = clz64(ysig0);
1600 }
1601 yexp = xexp - shift;
1602 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1603 }
1604 } else {
1605 /*
1606 * t's exponent must be greater than x's because t
1607 * is positive and the nearest multiple of 1/8 to
1608 * x, and if x has a greater exponent, the power
1609 * of 2 with that exponent is also a multiple of
1610 * 1/8.
1611 */
1612 uint64_t usig0, usig1;
1613 shift128RightJamming(xsig0, xsig1, texp - xexp,
1614 &usig0, &usig1);
1615 ysign = true;
1616 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
1617 if (ysig0 == 0) {
1618 shift = clz64(ysig1) + 64;
1619 } else {
1620 shift = clz64(ysig0);
1621 }
1622 yexp = texp - shift;
1623 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1624 }
1625 }
1626
1627 /*
1628 * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1629 * arctan(z).
1630 */
1631 zsign = ysign;
1632 if (texp == 0 || yexp == 0) {
1633 zexp = yexp;
1634 zsig0 = ysig0;
1635 zsig1 = ysig1;
1636 } else {
1637 /*
1638 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1639 */
1640 int32_t dexp = texp + xexp - 0x3ffe;
1641 uint64_t dsig0, dsig1, dsig2;
1642 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
1643 /*
1644 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1645 * bit). Add 1 to produce the denominator 1+tx.
1646 */
1647 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
1648 &dsig0, &dsig1);
1649 dsig0 |= 0x8000000000000000ULL;
1650 zexp = yexp - 1;
1651 remsig0 = ysig0;
1652 remsig1 = ysig1;
1653 remsig2 = 0;
1654 if (dsig0 <= remsig0) {
1655 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1656 ++zexp;
1657 }
1658 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
1659 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
1660 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
1661 &remsig0, &remsig1, &remsig2);
1662 while ((int64_t) remsig0 < 0) {
1663 --zsig0;
1664 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
1665 &remsig0, &remsig1, &remsig2);
1666 }
1667 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
1668 /* No need to correct any estimation error in zsig1. */
1669 }
1670
1671 if (zexp == 0) {
1672 azexp = 0;
1673 azsig0 = 0;
1674 azsig1 = 0;
1675 } else {
1676 floatx80 z2, accum;
1677 uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
1678 /* Compute z^2. */
1679 mul128To256(zsig0, zsig1, zsig0, zsig1,
1680 &z2sig0, &z2sig1, &z2sig2, &z2sig3);
1681 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1682 zexp + zexp - 0x3ffe,
1683 z2sig0, z2sig1,
1684 &env->fp_status);
1685
1686 /* Compute the lower parts of the polynomial expansion. */
1687 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
1688 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
1689 accum = floatx80_mul(accum, z2, &env->fp_status);
1690 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
1691 accum = floatx80_mul(accum, z2, &env->fp_status);
1692 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
1693 accum = floatx80_mul(accum, z2, &env->fp_status);
1694 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
1695 accum = floatx80_mul(accum, z2, &env->fp_status);
1696 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
1697 accum = floatx80_mul(accum, z2, &env->fp_status);
1698
1699 /*
1700 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1701 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1702 */
1703 aexp = extractFloatx80Exp(fpatan_coeff_0);
1704 shift128RightJamming(extractFloatx80Frac(accum), 0,
1705 aexp - extractFloatx80Exp(accum),
1706 &asig0, &asig1);
1707 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
1708 &asig0, &asig1);
1709 /* Multiply by z to compute arctan(z). */
1710 azexp = aexp + zexp - 0x3ffe;
1711 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
1712 &azsig2, &azsig3);
1713 }
1714
1715 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */
1716 if (texp == 0) {
1717 /* z is positive. */
1718 axexp = azexp;
1719 axsig0 = azsig0;
1720 axsig1 = azsig1;
1721 } else {
1722 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
1723 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
1724 uint64_t low_sig0 =
1725 extractFloatx80Frac(fpatan_table[n].atan_low);
1726 uint64_t low_sig1 = 0;
1727 axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
1728 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
1729 axsig1 = 0;
1730 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
1731 &low_sig0, &low_sig1);
1732 if (low_sign) {
1733 sub128(axsig0, axsig1, low_sig0, low_sig1,
1734 &axsig0, &axsig1);
1735 } else {
1736 add128(axsig0, axsig1, low_sig0, low_sig1,
1737 &axsig0, &axsig1);
1738 }
1739 if (azexp >= axexp) {
1740 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
1741 &axsig0, &axsig1);
1742 axexp = azexp + 1;
1743 shift128RightJamming(azsig0, azsig1, 1,
1744 &azsig0, &azsig1);
1745 } else {
1746 shift128RightJamming(axsig0, axsig1, 1,
1747 &axsig0, &axsig1);
1748 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
1749 &azsig0, &azsig1);
1750 ++axexp;
1751 }
1752 if (zsign) {
1753 sub128(axsig0, axsig1, azsig0, azsig1,
1754 &axsig0, &axsig1);
1755 } else {
1756 add128(axsig0, axsig1, azsig0, azsig1,
1757 &axsig0, &axsig1);
1758 }
1759 }
1760
1761 if (adj_exp == 0) {
1762 rexp = axexp;
1763 rsig0 = axsig0;
1764 rsig1 = axsig1;
1765 } else {
1766 /*
1767 * Add or subtract arctan(x) (exponent axexp,
1768 * significand axsig0 and axsig1, positive, not
1769 * necessarily normalized) to the number given by
1770 * adj_exp, adj_sig0 and adj_sig1, according to
1771 * adj_sub.
1772 */
1773 if (adj_exp >= axexp) {
1774 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
1775 &axsig0, &axsig1);
1776 rexp = adj_exp + 1;
1777 shift128RightJamming(adj_sig0, adj_sig1, 1,
1778 &adj_sig0, &adj_sig1);
1779 } else {
1780 shift128RightJamming(axsig0, axsig1, 1,
1781 &axsig0, &axsig1);
1782 shift128RightJamming(adj_sig0, adj_sig1,
1783 axexp - adj_exp + 1,
1784 &adj_sig0, &adj_sig1);
1785 rexp = axexp + 1;
1786 }
1787 if (adj_sub) {
1788 sub128(adj_sig0, adj_sig1, axsig0, axsig1,
1789 &rsig0, &rsig1);
1790 } else {
1791 add128(adj_sig0, adj_sig1, axsig0, axsig1,
1792 &rsig0, &rsig1);
1793 }
1794 }
1795
1796 env->fp_status.float_rounding_mode = save_mode;
1797 env->fp_status.floatx80_rounding_precision = save_prec;
1798 }
1799 /* This result is inexact. */
1800 rsig1 |= 1;
1801 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
1802 rsig0, rsig1, &env->fp_status);
1803 }
1804
1805 fpop(env);
1806 merge_exception_flags(env, old_flags);
1807 }
1808
helper_fxtract(CPUX86State * env)1809 void helper_fxtract(CPUX86State *env)
1810 {
1811 int old_flags = save_exception_flags(env);
1812 CPU_LDoubleU temp;
1813
1814 temp.d = ST0;
1815
1816 if (floatx80_is_zero(ST0)) {
1817 /* Easy way to generate -inf and raising division by 0 exception */
1818 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1819 &env->fp_status);
1820 fpush(env);
1821 ST0 = temp.d;
1822 } else if (floatx80_invalid_encoding(ST0, &env->fp_status)) {
1823 float_raise(float_flag_invalid, &env->fp_status);
1824 ST0 = floatx80_default_nan(&env->fp_status);
1825 fpush(env);
1826 ST0 = ST1;
1827 } else if (floatx80_is_any_nan(ST0)) {
1828 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1829 float_raise(float_flag_invalid, &env->fp_status);
1830 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1831 }
1832 fpush(env);
1833 ST0 = ST1;
1834 } else if (floatx80_is_infinity(ST0, &env->fp_status)) {
1835 fpush(env);
1836 ST0 = ST1;
1837 ST1 = floatx80_default_inf(0, &env->fp_status);
1838 } else {
1839 int expdif;
1840
1841 if (EXPD(temp) == 0) {
1842 int shift = clz64(temp.l.lower);
1843 temp.l.lower <<= shift;
1844 expdif = 1 - EXPBIAS - shift;
1845 float_raise(float_flag_input_denormal_flushed, &env->fp_status);
1846 } else {
1847 expdif = EXPD(temp) - EXPBIAS;
1848 }
1849 /* DP exponent bias */
1850 ST0 = int32_to_floatx80(expdif, &env->fp_status);
1851 fpush(env);
1852 BIASEXPONENT(temp);
1853 ST0 = temp.d;
1854 }
1855 merge_exception_flags(env, old_flags);
1856 }
1857
helper_fprem_common(CPUX86State * env,bool mod)1858 static void helper_fprem_common(CPUX86State *env, bool mod)
1859 {
1860 int old_flags = save_exception_flags(env);
1861 uint64_t quotient;
1862 CPU_LDoubleU temp0, temp1;
1863 int exp0, exp1, expdiff;
1864
1865 temp0.d = ST0;
1866 temp1.d = ST1;
1867 exp0 = EXPD(temp0);
1868 exp1 = EXPD(temp1);
1869
1870 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1871 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1872 exp0 == 0x7fff || exp1 == 0x7fff ||
1873 floatx80_invalid_encoding(ST0, &env->fp_status) ||
1874 floatx80_invalid_encoding(ST1, &env->fp_status)) {
1875 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status);
1876 } else {
1877 if (exp0 == 0) {
1878 exp0 = 1 - clz64(temp0.l.lower);
1879 }
1880 if (exp1 == 0) {
1881 exp1 = 1 - clz64(temp1.l.lower);
1882 }
1883 expdiff = exp0 - exp1;
1884 if (expdiff < 64) {
1885 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status);
1886 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */
1887 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1888 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */
1889 } else {
1890 /*
1891 * Partial remainder. This choice of how many bits to
1892 * process at once is specified in AMD instruction set
1893 * manuals, and empirically is followed by Intel
1894 * processors as well; it ensures that the final remainder
1895 * operation in a loop does produce the correct low three
1896 * bits of the quotient. AMD manuals specify that the
1897 * flags other than C2 are cleared, and empirically Intel
1898 * processors clear them as well.
1899 */
1900 int n = 32 + (expdiff % 32);
1901 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1902 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1903 env->fpus |= 0x400; /* C2 <-- 1 */
1904 }
1905 }
1906 merge_exception_flags(env, old_flags);
1907 }
1908
helper_fprem1(CPUX86State * env)1909 void helper_fprem1(CPUX86State *env)
1910 {
1911 helper_fprem_common(env, false);
1912 }
1913
helper_fprem(CPUX86State * env)1914 void helper_fprem(CPUX86State *env)
1915 {
1916 helper_fprem_common(env, true);
1917 }
1918
1919 /* 128-bit significand of log2(e). */
1920 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1921 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1922
1923 /*
1924 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1925 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1926 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1927 * interval [sqrt(2)/2, sqrt(2)].
1928 */
1929 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1930 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1931 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1932 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1933 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1934 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1935 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1936 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1937 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1938 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1939 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1940
1941 /*
1942 * Compute an approximation of log2(1+arg), where 1+arg is in the
1943 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
1944 * function is called, rounding precision is set to 80 and the
1945 * round-to-nearest mode is in effect. arg must not be exactly zero,
1946 * and must not be so close to zero that underflow might occur.
1947 */
helper_fyl2x_common(CPUX86State * env,floatx80 arg,int32_t * exp,uint64_t * sig0,uint64_t * sig1)1948 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1949 uint64_t *sig0, uint64_t *sig1)
1950 {
1951 uint64_t arg0_sig = extractFloatx80Frac(arg);
1952 int32_t arg0_exp = extractFloatx80Exp(arg);
1953 bool arg0_sign = extractFloatx80Sign(arg);
1954 bool asign;
1955 int32_t dexp, texp, aexp;
1956 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1957 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1958 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1959 floatx80 t2, accum;
1960
1961 /*
1962 * Compute an approximation of arg/(2+arg), with extra precision,
1963 * as the argument to a polynomial approximation. The extra
1964 * precision is only needed for the first term of the
1965 * approximation, with subsequent terms being significantly
1966 * smaller; the approximation only uses odd exponents, and the
1967 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1968 */
1969 if (arg0_sign) {
1970 dexp = 0x3fff;
1971 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1972 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1973 } else {
1974 dexp = 0x4000;
1975 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1976 dsig0 |= 0x8000000000000000ULL;
1977 }
1978 texp = arg0_exp - dexp + 0x3ffe;
1979 rsig0 = arg0_sig;
1980 rsig1 = 0;
1981 rsig2 = 0;
1982 if (dsig0 <= rsig0) {
1983 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1984 ++texp;
1985 }
1986 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1987 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1988 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1989 &rsig0, &rsig1, &rsig2);
1990 while ((int64_t) rsig0 < 0) {
1991 --tsig0;
1992 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1993 &rsig0, &rsig1, &rsig2);
1994 }
1995 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1996 /*
1997 * No need to correct any estimation error in tsig1; even with
1998 * such error, it is accurate enough. Now compute the square of
1999 * that approximation.
2000 */
2001 mul128To256(tsig0, tsig1, tsig0, tsig1,
2002 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
2003 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
2004 texp + texp - 0x3ffe,
2005 t2sig0, t2sig1, &env->fp_status);
2006
2007 /* Compute the lower parts of the polynomial expansion. */
2008 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
2009 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
2010 accum = floatx80_mul(accum, t2, &env->fp_status);
2011 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
2012 accum = floatx80_mul(accum, t2, &env->fp_status);
2013 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
2014 accum = floatx80_mul(accum, t2, &env->fp_status);
2015 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
2016 accum = floatx80_mul(accum, t2, &env->fp_status);
2017 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
2018 accum = floatx80_mul(accum, t2, &env->fp_status);
2019 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
2020 accum = floatx80_mul(accum, t2, &env->fp_status);
2021 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
2022 accum = floatx80_mul(accum, t2, &env->fp_status);
2023 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
2024 accum = floatx80_mul(accum, t2, &env->fp_status);
2025 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
2026
2027 /*
2028 * The full polynomial expansion is fyl2x_coeff_0 + accum (where
2029 * accum has much lower magnitude, and so, in particular, carry
2030 * out of the addition is not possible), multiplied by t. (This
2031 * expansion is only accurate to about 70 bits, not 128 bits.)
2032 */
2033 aexp = extractFloatx80Exp(fyl2x_coeff_0);
2034 asign = extractFloatx80Sign(fyl2x_coeff_0);
2035 shift128RightJamming(extractFloatx80Frac(accum), 0,
2036 aexp - extractFloatx80Exp(accum),
2037 &asig0, &asig1);
2038 bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
2039 bsig1 = 0;
2040 if (asign == extractFloatx80Sign(accum)) {
2041 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
2042 } else {
2043 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
2044 }
2045 /* Multiply by t to compute the required result. */
2046 mul128To256(asig0, asig1, tsig0, tsig1,
2047 &asig0, &asig1, &asig2, &asig3);
2048 aexp += texp - 0x3ffe;
2049 *exp = aexp;
2050 *sig0 = asig0;
2051 *sig1 = asig1;
2052 }
2053
helper_fyl2xp1(CPUX86State * env)2054 void helper_fyl2xp1(CPUX86State *env)
2055 {
2056 int old_flags = save_exception_flags(env);
2057 uint64_t arg0_sig = extractFloatx80Frac(ST0);
2058 int32_t arg0_exp = extractFloatx80Exp(ST0);
2059 bool arg0_sign = extractFloatx80Sign(ST0);
2060 uint64_t arg1_sig = extractFloatx80Frac(ST1);
2061 int32_t arg1_exp = extractFloatx80Exp(ST1);
2062 bool arg1_sign = extractFloatx80Sign(ST1);
2063
2064 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2065 float_raise(float_flag_invalid, &env->fp_status);
2066 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2067 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2068 float_raise(float_flag_invalid, &env->fp_status);
2069 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2070 } else if (floatx80_invalid_encoding(ST0, &env->fp_status) ||
2071 floatx80_invalid_encoding(ST1, &env->fp_status)) {
2072 float_raise(float_flag_invalid, &env->fp_status);
2073 ST1 = floatx80_default_nan(&env->fp_status);
2074 } else if (floatx80_is_any_nan(ST0)) {
2075 ST1 = ST0;
2076 } else if (floatx80_is_any_nan(ST1)) {
2077 /* Pass this NaN through. */
2078 } else if (arg0_exp > 0x3ffd ||
2079 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
2080 0x95f619980c4336f7ULL :
2081 0xd413cccfe7799211ULL))) {
2082 /*
2083 * Out of range for the instruction (ST0 must have absolute
2084 * value less than 1 - sqrt(2)/2 = 0.292..., according to
2085 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2086 * to sqrt(2) - 1, which we allow here), treat as invalid.
2087 */
2088 float_raise(float_flag_invalid, &env->fp_status);
2089 ST1 = floatx80_default_nan(&env->fp_status);
2090 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
2091 arg1_exp == 0x7fff) {
2092 /*
2093 * One argument is zero, or multiplying by infinity; correct
2094 * result is exact and can be obtained by multiplying the
2095 * arguments.
2096 */
2097 ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
2098 } else if (arg0_exp < 0x3fb0) {
2099 /*
2100 * Multiplying both arguments and an extra-precision version
2101 * of log2(e) is sufficiently precise.
2102 */
2103 uint64_t sig0, sig1, sig2;
2104 int32_t exp;
2105 if (arg0_exp == 0) {
2106 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2107 }
2108 if (arg1_exp == 0) {
2109 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2110 }
2111 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
2112 &sig0, &sig1, &sig2);
2113 exp = arg0_exp + 1;
2114 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
2115 exp += arg1_exp - 0x3ffe;
2116 /* This result is inexact. */
2117 sig1 |= 1;
2118 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2119 arg0_sign ^ arg1_sign, exp,
2120 sig0, sig1, &env->fp_status);
2121 } else {
2122 int32_t aexp;
2123 uint64_t asig0, asig1, asig2;
2124 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2125 FloatX80RoundPrec save_prec =
2126 env->fp_status.floatx80_rounding_precision;
2127 env->fp_status.float_rounding_mode = float_round_nearest_even;
2128 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2129
2130 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
2131 /*
2132 * Multiply by the second argument to compute the required
2133 * result.
2134 */
2135 if (arg1_exp == 0) {
2136 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2137 }
2138 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2139 aexp += arg1_exp - 0x3ffe;
2140 /* This result is inexact. */
2141 asig1 |= 1;
2142 env->fp_status.float_rounding_mode = save_mode;
2143 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2144 arg0_sign ^ arg1_sign, aexp,
2145 asig0, asig1, &env->fp_status);
2146 env->fp_status.floatx80_rounding_precision = save_prec;
2147 }
2148 fpop(env);
2149 merge_exception_flags(env, old_flags);
2150 }
2151
helper_fyl2x(CPUX86State * env)2152 void helper_fyl2x(CPUX86State *env)
2153 {
2154 int old_flags = save_exception_flags(env);
2155 uint64_t arg0_sig = extractFloatx80Frac(ST0);
2156 int32_t arg0_exp = extractFloatx80Exp(ST0);
2157 bool arg0_sign = extractFloatx80Sign(ST0);
2158 uint64_t arg1_sig = extractFloatx80Frac(ST1);
2159 int32_t arg1_exp = extractFloatx80Exp(ST1);
2160 bool arg1_sign = extractFloatx80Sign(ST1);
2161
2162 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2163 float_raise(float_flag_invalid, &env->fp_status);
2164 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2165 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2166 float_raise(float_flag_invalid, &env->fp_status);
2167 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2168 } else if (floatx80_invalid_encoding(ST0, &env->fp_status) ||
2169 floatx80_invalid_encoding(ST1, &env->fp_status)) {
2170 float_raise(float_flag_invalid, &env->fp_status);
2171 ST1 = floatx80_default_nan(&env->fp_status);
2172 } else if (floatx80_is_any_nan(ST0)) {
2173 ST1 = ST0;
2174 } else if (floatx80_is_any_nan(ST1)) {
2175 /* Pass this NaN through. */
2176 } else if (arg0_sign && !floatx80_is_zero(ST0)) {
2177 float_raise(float_flag_invalid, &env->fp_status);
2178 ST1 = floatx80_default_nan(&env->fp_status);
2179 } else if (floatx80_is_infinity(ST1, &env->fp_status)) {
2180 FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
2181 &env->fp_status);
2182 switch (cmp) {
2183 case float_relation_less:
2184 ST1 = floatx80_chs(ST1);
2185 break;
2186 case float_relation_greater:
2187 /* Result is infinity of the same sign as ST1. */
2188 break;
2189 default:
2190 float_raise(float_flag_invalid, &env->fp_status);
2191 ST1 = floatx80_default_nan(&env->fp_status);
2192 break;
2193 }
2194 } else if (floatx80_is_infinity(ST0, &env->fp_status)) {
2195 if (floatx80_is_zero(ST1)) {
2196 float_raise(float_flag_invalid, &env->fp_status);
2197 ST1 = floatx80_default_nan(&env->fp_status);
2198 } else if (arg1_sign) {
2199 ST1 = floatx80_chs(ST0);
2200 } else {
2201 ST1 = ST0;
2202 }
2203 } else if (floatx80_is_zero(ST0)) {
2204 if (floatx80_is_zero(ST1)) {
2205 float_raise(float_flag_invalid, &env->fp_status);
2206 ST1 = floatx80_default_nan(&env->fp_status);
2207 } else {
2208 /* Result is infinity with opposite sign to ST1. */
2209 float_raise(float_flag_divbyzero, &env->fp_status);
2210 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
2211 0x8000000000000000ULL);
2212 }
2213 } else if (floatx80_is_zero(ST1)) {
2214 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
2215 ST1 = floatx80_chs(ST1);
2216 }
2217 /* Otherwise, ST1 is already the correct result. */
2218 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
2219 if (arg1_sign) {
2220 ST1 = floatx80_chs(floatx80_zero);
2221 } else {
2222 ST1 = floatx80_zero;
2223 }
2224 } else {
2225 int32_t int_exp;
2226 floatx80 arg0_m1;
2227 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2228 FloatX80RoundPrec save_prec =
2229 env->fp_status.floatx80_rounding_precision;
2230 env->fp_status.float_rounding_mode = float_round_nearest_even;
2231 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2232
2233 if (arg0_exp == 0) {
2234 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2235 }
2236 if (arg1_exp == 0) {
2237 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2238 }
2239 int_exp = arg0_exp - 0x3fff;
2240 if (arg0_sig > 0xb504f333f9de6484ULL) {
2241 ++int_exp;
2242 }
2243 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
2244 &env->fp_status),
2245 floatx80_one, &env->fp_status);
2246 if (floatx80_is_zero(arg0_m1)) {
2247 /* Exact power of 2; multiply by ST1. */
2248 env->fp_status.float_rounding_mode = save_mode;
2249 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
2250 ST1, &env->fp_status);
2251 } else {
2252 bool asign = extractFloatx80Sign(arg0_m1);
2253 int32_t aexp;
2254 uint64_t asig0, asig1, asig2;
2255 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
2256 if (int_exp != 0) {
2257 bool isign = (int_exp < 0);
2258 int32_t iexp;
2259 uint64_t isig;
2260 int shift;
2261 int_exp = isign ? -int_exp : int_exp;
2262 shift = clz32(int_exp) + 32;
2263 isig = int_exp;
2264 isig <<= shift;
2265 iexp = 0x403e - shift;
2266 shift128RightJamming(asig0, asig1, iexp - aexp,
2267 &asig0, &asig1);
2268 if (asign == isign) {
2269 add128(isig, 0, asig0, asig1, &asig0, &asig1);
2270 } else {
2271 sub128(isig, 0, asig0, asig1, &asig0, &asig1);
2272 }
2273 aexp = iexp;
2274 asign = isign;
2275 }
2276 /*
2277 * Multiply by the second argument to compute the required
2278 * result.
2279 */
2280 if (arg1_exp == 0) {
2281 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2282 }
2283 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2284 aexp += arg1_exp - 0x3ffe;
2285 /* This result is inexact. */
2286 asig1 |= 1;
2287 env->fp_status.float_rounding_mode = save_mode;
2288 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2289 asign ^ arg1_sign, aexp,
2290 asig0, asig1, &env->fp_status);
2291 }
2292
2293 env->fp_status.floatx80_rounding_precision = save_prec;
2294 }
2295 fpop(env);
2296 merge_exception_flags(env, old_flags);
2297 }
2298
helper_fsqrt(CPUX86State * env)2299 void helper_fsqrt(CPUX86State *env)
2300 {
2301 int old_flags = save_exception_flags(env);
2302 if (floatx80_is_neg(ST0)) {
2303 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2304 env->fpus |= 0x400;
2305 }
2306 ST0 = floatx80_sqrt(ST0, &env->fp_status);
2307 merge_exception_flags(env, old_flags);
2308 }
2309
helper_fsincos(CPUX86State * env)2310 void helper_fsincos(CPUX86State *env)
2311 {
2312 double fptemp = floatx80_to_double(env, ST0);
2313
2314 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2315 env->fpus |= 0x400;
2316 } else {
2317 ST0 = double_to_floatx80(env, sin(fptemp));
2318 fpush(env);
2319 ST0 = double_to_floatx80(env, cos(fptemp));
2320 env->fpus &= ~0x400; /* C2 <-- 0 */
2321 /* the above code is for |arg| < 2**63 only */
2322 }
2323 }
2324
helper_frndint(CPUX86State * env)2325 void helper_frndint(CPUX86State *env)
2326 {
2327 int old_flags = save_exception_flags(env);
2328 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
2329 merge_exception_flags(env, old_flags);
2330 }
2331
helper_fscale(CPUX86State * env)2332 void helper_fscale(CPUX86State *env)
2333 {
2334 int old_flags = save_exception_flags(env);
2335 if (floatx80_invalid_encoding(ST1, &env->fp_status) ||
2336 floatx80_invalid_encoding(ST0, &env->fp_status)) {
2337 float_raise(float_flag_invalid, &env->fp_status);
2338 ST0 = floatx80_default_nan(&env->fp_status);
2339 } else if (floatx80_is_any_nan(ST1)) {
2340 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2341 float_raise(float_flag_invalid, &env->fp_status);
2342 }
2343 ST0 = ST1;
2344 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2345 float_raise(float_flag_invalid, &env->fp_status);
2346 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
2347 }
2348 } else if (floatx80_is_infinity(ST1, &env->fp_status) &&
2349 !floatx80_invalid_encoding(ST0, &env->fp_status) &&
2350 !floatx80_is_any_nan(ST0)) {
2351 if (floatx80_is_neg(ST1)) {
2352 if (floatx80_is_infinity(ST0, &env->fp_status)) {
2353 float_raise(float_flag_invalid, &env->fp_status);
2354 ST0 = floatx80_default_nan(&env->fp_status);
2355 } else {
2356 ST0 = (floatx80_is_neg(ST0) ?
2357 floatx80_chs(floatx80_zero) :
2358 floatx80_zero);
2359 }
2360 } else {
2361 if (floatx80_is_zero(ST0)) {
2362 float_raise(float_flag_invalid, &env->fp_status);
2363 ST0 = floatx80_default_nan(&env->fp_status);
2364 } else {
2365 ST0 = floatx80_default_inf(floatx80_is_neg(ST0),
2366 &env->fp_status);
2367 }
2368 }
2369 } else {
2370 int n;
2371 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
2372 int save_flags = get_float_exception_flags(&env->fp_status);
2373 set_float_exception_flags(0, &env->fp_status);
2374 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
2375 set_float_exception_flags(save_flags, &env->fp_status);
2376 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2377 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
2378 env->fp_status.floatx80_rounding_precision = save;
2379 }
2380 merge_exception_flags(env, old_flags);
2381 }
2382
helper_fsin(CPUX86State * env)2383 void helper_fsin(CPUX86State *env)
2384 {
2385 double fptemp = floatx80_to_double(env, ST0);
2386
2387 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2388 env->fpus |= 0x400;
2389 } else {
2390 ST0 = double_to_floatx80(env, sin(fptemp));
2391 env->fpus &= ~0x400; /* C2 <-- 0 */
2392 /* the above code is for |arg| < 2**53 only */
2393 }
2394 }
2395
helper_fcos(CPUX86State * env)2396 void helper_fcos(CPUX86State *env)
2397 {
2398 double fptemp = floatx80_to_double(env, ST0);
2399
2400 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2401 env->fpus |= 0x400;
2402 } else {
2403 ST0 = double_to_floatx80(env, cos(fptemp));
2404 env->fpus &= ~0x400; /* C2 <-- 0 */
2405 /* the above code is for |arg| < 2**63 only */
2406 }
2407 }
2408
helper_fxam_ST0(CPUX86State * env)2409 void helper_fxam_ST0(CPUX86State *env)
2410 {
2411 CPU_LDoubleU temp;
2412 int expdif;
2413
2414 temp.d = ST0;
2415
2416 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2417 if (SIGND(temp)) {
2418 env->fpus |= 0x200; /* C1 <-- 1 */
2419 }
2420
2421 if (env->fptags[env->fpstt]) {
2422 env->fpus |= 0x4100; /* Empty */
2423 return;
2424 }
2425
2426 expdif = EXPD(temp);
2427 if (expdif == MAXEXPD) {
2428 if (MANTD(temp) == 0x8000000000000000ULL) {
2429 env->fpus |= 0x500; /* Infinity */
2430 } else if (MANTD(temp) & 0x8000000000000000ULL) {
2431 env->fpus |= 0x100; /* NaN */
2432 }
2433 } else if (expdif == 0) {
2434 if (MANTD(temp) == 0) {
2435 env->fpus |= 0x4000; /* Zero */
2436 } else {
2437 env->fpus |= 0x4400; /* Denormal */
2438 }
2439 } else if (MANTD(temp) & 0x8000000000000000ULL) {
2440 env->fpus |= 0x400;
2441 }
2442 }
2443
do_fstenv(X86Access * ac,target_ulong ptr,int data32)2444 static void do_fstenv(X86Access *ac, target_ulong ptr, int data32)
2445 {
2446 CPUX86State *env = ac->env;
2447 int fpus, fptag, exp, i;
2448 uint64_t mant;
2449 CPU_LDoubleU tmp;
2450
2451 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2452 fptag = 0;
2453 for (i = 7; i >= 0; i--) {
2454 fptag <<= 2;
2455 if (env->fptags[i]) {
2456 fptag |= 3;
2457 } else {
2458 tmp.d = env->fpregs[i].d;
2459 exp = EXPD(tmp);
2460 mant = MANTD(tmp);
2461 if (exp == 0 && mant == 0) {
2462 /* zero */
2463 fptag |= 1;
2464 } else if (exp == 0 || exp == MAXEXPD
2465 || (mant & (1LL << 63)) == 0) {
2466 /* NaNs, infinity, denormal */
2467 fptag |= 2;
2468 }
2469 }
2470 }
2471 if (data32) {
2472 /* 32 bit */
2473 access_stl(ac, ptr, env->fpuc);
2474 access_stl(ac, ptr + 4, fpus);
2475 access_stl(ac, ptr + 8, fptag);
2476 access_stl(ac, ptr + 12, env->fpip); /* fpip */
2477 access_stl(ac, ptr + 16, env->fpcs); /* fpcs */
2478 access_stl(ac, ptr + 20, env->fpdp); /* fpoo */
2479 access_stl(ac, ptr + 24, env->fpds); /* fpos */
2480 } else {
2481 /* 16 bit */
2482 access_stw(ac, ptr, env->fpuc);
2483 access_stw(ac, ptr + 2, fpus);
2484 access_stw(ac, ptr + 4, fptag);
2485 access_stw(ac, ptr + 6, env->fpip);
2486 access_stw(ac, ptr + 8, env->fpcs);
2487 access_stw(ac, ptr + 10, env->fpdp);
2488 access_stw(ac, ptr + 12, env->fpds);
2489 }
2490 }
2491
helper_fstenv(CPUX86State * env,target_ulong ptr,int data32)2492 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
2493 {
2494 X86Access ac;
2495
2496 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
2497 do_fstenv(&ac, ptr, data32);
2498 }
2499
cpu_set_fpus(CPUX86State * env,uint16_t fpus)2500 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
2501 {
2502 env->fpstt = (fpus >> 11) & 7;
2503 env->fpus = fpus & ~0x3800 & ~FPUS_B;
2504 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
2505 #if !defined(CONFIG_USER_ONLY)
2506 if (!(env->fpus & FPUS_SE)) {
2507 /*
2508 * Here the processor deasserts FERR#; in response, the chipset deasserts
2509 * IGNNE#.
2510 */
2511 cpu_clear_ignne();
2512 }
2513 #endif
2514 }
2515
do_fldenv(X86Access * ac,target_ulong ptr,int data32)2516 static void do_fldenv(X86Access *ac, target_ulong ptr, int data32)
2517 {
2518 int i, fpus, fptag;
2519 CPUX86State *env = ac->env;
2520
2521 cpu_set_fpuc(env, access_ldw(ac, ptr));
2522 fpus = access_ldw(ac, ptr + (2 << data32));
2523 fptag = access_ldw(ac, ptr + (4 << data32));
2524
2525 cpu_set_fpus(env, fpus);
2526 for (i = 0; i < 8; i++) {
2527 env->fptags[i] = ((fptag & 3) == 3);
2528 fptag >>= 2;
2529 }
2530 }
2531
helper_fldenv(CPUX86State * env,target_ulong ptr,int data32)2532 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
2533 {
2534 X86Access ac;
2535
2536 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
2537 do_fldenv(&ac, ptr, data32);
2538 }
2539
do_fsave(X86Access * ac,target_ulong ptr,int data32)2540 static void do_fsave(X86Access *ac, target_ulong ptr, int data32)
2541 {
2542 CPUX86State *env = ac->env;
2543
2544 do_fstenv(ac, ptr, data32);
2545 ptr += 14 << data32;
2546
2547 for (int i = 0; i < 8; i++) {
2548 floatx80 tmp = ST(i);
2549 do_fstt(ac, ptr, tmp);
2550 ptr += 10;
2551 }
2552
2553 do_fninit(env);
2554 }
2555
helper_fsave(CPUX86State * env,target_ulong ptr,int data32)2556 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
2557 {
2558 int size = (14 << data32) + 80;
2559 X86Access ac;
2560
2561 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC());
2562 do_fsave(&ac, ptr, data32);
2563 }
2564
do_frstor(X86Access * ac,target_ulong ptr,int data32)2565 static void do_frstor(X86Access *ac, target_ulong ptr, int data32)
2566 {
2567 CPUX86State *env = ac->env;
2568
2569 do_fldenv(ac, ptr, data32);
2570 ptr += 14 << data32;
2571
2572 for (int i = 0; i < 8; i++) {
2573 floatx80 tmp = do_fldt(ac, ptr);
2574 ST(i) = tmp;
2575 ptr += 10;
2576 }
2577 }
2578
helper_frstor(CPUX86State * env,target_ulong ptr,int data32)2579 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2580 {
2581 int size = (14 << data32) + 80;
2582 X86Access ac;
2583
2584 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC());
2585 do_frstor(&ac, ptr, data32);
2586 }
2587
2588 #define XO(X) offsetof(X86XSaveArea, X)
2589
do_xsave_fpu(X86Access * ac,target_ulong ptr)2590 static void do_xsave_fpu(X86Access *ac, target_ulong ptr)
2591 {
2592 CPUX86State *env = ac->env;
2593 int fpus, fptag, i;
2594 target_ulong addr;
2595
2596 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2597 fptag = 0;
2598 for (i = 0; i < 8; i++) {
2599 fptag |= (env->fptags[i] << i);
2600 }
2601
2602 access_stw(ac, ptr + XO(legacy.fcw), env->fpuc);
2603 access_stw(ac, ptr + XO(legacy.fsw), fpus);
2604 access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff);
2605
2606 /* In 32-bit mode this is eip, sel, dp, sel.
2607 In 64-bit mode this is rip, rdp.
2608 But in either case we don't write actual data, just zeros. */
2609 access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */
2610 access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */
2611
2612 addr = ptr + XO(legacy.fpregs);
2613
2614 for (i = 0; i < 8; i++) {
2615 floatx80 tmp = ST(i);
2616 do_fstt(ac, addr, tmp);
2617 addr += 16;
2618 }
2619 }
2620
do_xsave_mxcsr(X86Access * ac,target_ulong ptr)2621 static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr)
2622 {
2623 CPUX86State *env = ac->env;
2624
2625 update_mxcsr_from_sse_status(env);
2626 access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr);
2627 access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff);
2628 }
2629
do_xsave_sse(X86Access * ac,target_ulong ptr)2630 static void do_xsave_sse(X86Access *ac, target_ulong ptr)
2631 {
2632 CPUX86State *env = ac->env;
2633 int i, nb_xmm_regs;
2634 target_ulong addr;
2635
2636 if (env->hflags & HF_CS64_MASK) {
2637 nb_xmm_regs = 16;
2638 } else {
2639 nb_xmm_regs = 8;
2640 }
2641
2642 addr = ptr + XO(legacy.xmm_regs);
2643 for (i = 0; i < nb_xmm_regs; i++) {
2644 access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0));
2645 access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1));
2646 addr += 16;
2647 }
2648 }
2649
do_xsave_ymmh(X86Access * ac,target_ulong ptr)2650 static void do_xsave_ymmh(X86Access *ac, target_ulong ptr)
2651 {
2652 CPUX86State *env = ac->env;
2653 int i, nb_xmm_regs;
2654
2655 if (env->hflags & HF_CS64_MASK) {
2656 nb_xmm_regs = 16;
2657 } else {
2658 nb_xmm_regs = 8;
2659 }
2660
2661 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
2662 access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2));
2663 access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3));
2664 }
2665 }
2666
do_xsave_bndregs(X86Access * ac,target_ulong ptr)2667 static void do_xsave_bndregs(X86Access *ac, target_ulong ptr)
2668 {
2669 CPUX86State *env = ac->env;
2670 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2671 int i;
2672
2673 for (i = 0; i < 4; i++, addr += 16) {
2674 access_stq(ac, addr, env->bnd_regs[i].lb);
2675 access_stq(ac, addr + 8, env->bnd_regs[i].ub);
2676 }
2677 }
2678
do_xsave_bndcsr(X86Access * ac,target_ulong ptr)2679 static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr)
2680 {
2681 CPUX86State *env = ac->env;
2682
2683 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2684 env->bndcs_regs.cfgu);
2685 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2686 env->bndcs_regs.sts);
2687 }
2688
do_xsave_pkru(X86Access * ac,target_ulong ptr)2689 static void do_xsave_pkru(X86Access *ac, target_ulong ptr)
2690 {
2691 access_stq(ac, ptr, ac->env->pkru);
2692 }
2693
do_fxsave(X86Access * ac,target_ulong ptr)2694 static void do_fxsave(X86Access *ac, target_ulong ptr)
2695 {
2696 CPUX86State *env = ac->env;
2697
2698 do_xsave_fpu(ac, ptr);
2699 if (env->cr[4] & CR4_OSFXSR_MASK) {
2700 do_xsave_mxcsr(ac, ptr);
2701 /* Fast FXSAVE leaves out the XMM registers */
2702 if (!(env->efer & MSR_EFER_FFXSR)
2703 || (env->hflags & HF_CPL_MASK)
2704 || !(env->hflags & HF_LMA_MASK)) {
2705 do_xsave_sse(ac, ptr);
2706 }
2707 }
2708 }
2709
helper_fxsave(CPUX86State * env,target_ulong ptr)2710 void helper_fxsave(CPUX86State *env, target_ulong ptr)
2711 {
2712 uintptr_t ra = GETPC();
2713 X86Access ac;
2714
2715 /* The operand must be 16 byte aligned */
2716 if (ptr & 0xf) {
2717 raise_exception_ra(env, EXCP0D_GPF, ra);
2718 }
2719
2720 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
2721 MMU_DATA_STORE, ra);
2722 do_fxsave(&ac, ptr);
2723 }
2724
get_xinuse(CPUX86State * env)2725 static uint64_t get_xinuse(CPUX86State *env)
2726 {
2727 uint64_t inuse = -1;
2728
2729 /* For the most part, we don't track XINUSE. We could calculate it
2730 here for all components, but it's probably less work to simply
2731 indicate in use. That said, the state of BNDREGS is important
2732 enough to track in HFLAGS, so we might as well use that here. */
2733 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2734 inuse &= ~XSTATE_BNDREGS_MASK;
2735 }
2736 return inuse;
2737 }
2738
do_xsave_access(X86Access * ac,target_ulong ptr,uint64_t rfbm,uint64_t inuse,uint64_t opt)2739 static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm,
2740 uint64_t inuse, uint64_t opt)
2741 {
2742 uint64_t old_bv, new_bv;
2743
2744 if (opt & XSTATE_FP_MASK) {
2745 do_xsave_fpu(ac, ptr);
2746 }
2747 if (rfbm & XSTATE_SSE_MASK) {
2748 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2749 do_xsave_mxcsr(ac, ptr);
2750 }
2751 if (opt & XSTATE_SSE_MASK) {
2752 do_xsave_sse(ac, ptr);
2753 }
2754 if (opt & XSTATE_YMM_MASK) {
2755 do_xsave_ymmh(ac, ptr + XO(avx_state));
2756 }
2757 if (opt & XSTATE_BNDREGS_MASK) {
2758 do_xsave_bndregs(ac, ptr + XO(bndreg_state));
2759 }
2760 if (opt & XSTATE_BNDCSR_MASK) {
2761 do_xsave_bndcsr(ac, ptr + XO(bndcsr_state));
2762 }
2763 if (opt & XSTATE_PKRU_MASK) {
2764 do_xsave_pkru(ac, ptr + XO(pkru_state));
2765 }
2766
2767 /* Update the XSTATE_BV field. */
2768 old_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
2769 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2770 access_stq(ac, ptr + XO(header.xstate_bv), new_bv);
2771 }
2772
do_xsave_chk(CPUX86State * env,target_ulong ptr,uintptr_t ra)2773 static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2774 {
2775 /* The OS must have enabled XSAVE. */
2776 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2777 raise_exception_ra(env, EXCP06_ILLOP, ra);
2778 }
2779
2780 /* The operand must be 64 byte aligned. */
2781 if (ptr & 63) {
2782 raise_exception_ra(env, EXCP0D_GPF, ra);
2783 }
2784 }
2785
do_xsave(CPUX86State * env,target_ulong ptr,uint64_t rfbm,uint64_t inuse,uint64_t opt,uintptr_t ra)2786 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2787 uint64_t inuse, uint64_t opt, uintptr_t ra)
2788 {
2789 X86Access ac;
2790 unsigned size;
2791
2792 do_xsave_chk(env, ptr, ra);
2793
2794 /* Never save anything not enabled by XCR0. */
2795 rfbm &= env->xcr0;
2796 opt &= rfbm;
2797 size = xsave_area_size(opt, false);
2798
2799 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra);
2800 do_xsave_access(&ac, ptr, rfbm, inuse, opt);
2801 }
2802
helper_xsave(CPUX86State * env,target_ulong ptr,uint64_t rfbm)2803 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2804 {
2805 do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC());
2806 }
2807
helper_xsaveopt(CPUX86State * env,target_ulong ptr,uint64_t rfbm)2808 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2809 {
2810 uint64_t inuse = get_xinuse(env);
2811 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2812 }
2813
do_xrstor_fpu(X86Access * ac,target_ulong ptr)2814 static void do_xrstor_fpu(X86Access *ac, target_ulong ptr)
2815 {
2816 CPUX86State *env = ac->env;
2817 int i, fpuc, fpus, fptag;
2818 target_ulong addr;
2819
2820 fpuc = access_ldw(ac, ptr + XO(legacy.fcw));
2821 fpus = access_ldw(ac, ptr + XO(legacy.fsw));
2822 fptag = access_ldw(ac, ptr + XO(legacy.ftw));
2823 cpu_set_fpuc(env, fpuc);
2824 cpu_set_fpus(env, fpus);
2825
2826 fptag ^= 0xff;
2827 for (i = 0; i < 8; i++) {
2828 env->fptags[i] = ((fptag >> i) & 1);
2829 }
2830
2831 addr = ptr + XO(legacy.fpregs);
2832
2833 for (i = 0; i < 8; i++) {
2834 floatx80 tmp = do_fldt(ac, addr);
2835 ST(i) = tmp;
2836 addr += 16;
2837 }
2838 }
2839
do_xrstor_mxcsr(X86Access * ac,target_ulong ptr)2840 static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr)
2841 {
2842 CPUX86State *env = ac->env;
2843 cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr)));
2844 }
2845
do_xrstor_sse(X86Access * ac,target_ulong ptr)2846 static void do_xrstor_sse(X86Access *ac, target_ulong ptr)
2847 {
2848 CPUX86State *env = ac->env;
2849 int i, nb_xmm_regs;
2850 target_ulong addr;
2851
2852 if (env->hflags & HF_CS64_MASK) {
2853 nb_xmm_regs = 16;
2854 } else {
2855 nb_xmm_regs = 8;
2856 }
2857
2858 addr = ptr + XO(legacy.xmm_regs);
2859 for (i = 0; i < nb_xmm_regs; i++) {
2860 env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr);
2861 env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8);
2862 addr += 16;
2863 }
2864 }
2865
do_clear_sse(CPUX86State * env)2866 static void do_clear_sse(CPUX86State *env)
2867 {
2868 int i, nb_xmm_regs;
2869
2870 if (env->hflags & HF_CS64_MASK) {
2871 nb_xmm_regs = 16;
2872 } else {
2873 nb_xmm_regs = 8;
2874 }
2875
2876 for (i = 0; i < nb_xmm_regs; i++) {
2877 env->xmm_regs[i].ZMM_Q(0) = 0;
2878 env->xmm_regs[i].ZMM_Q(1) = 0;
2879 }
2880 }
2881
do_xrstor_ymmh(X86Access * ac,target_ulong ptr)2882 static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr)
2883 {
2884 CPUX86State *env = ac->env;
2885 int i, nb_xmm_regs;
2886
2887 if (env->hflags & HF_CS64_MASK) {
2888 nb_xmm_regs = 16;
2889 } else {
2890 nb_xmm_regs = 8;
2891 }
2892
2893 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
2894 env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr);
2895 env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8);
2896 }
2897 }
2898
do_clear_ymmh(CPUX86State * env)2899 static void do_clear_ymmh(CPUX86State *env)
2900 {
2901 int i, nb_xmm_regs;
2902
2903 if (env->hflags & HF_CS64_MASK) {
2904 nb_xmm_regs = 16;
2905 } else {
2906 nb_xmm_regs = 8;
2907 }
2908
2909 for (i = 0; i < nb_xmm_regs; i++) {
2910 env->xmm_regs[i].ZMM_Q(2) = 0;
2911 env->xmm_regs[i].ZMM_Q(3) = 0;
2912 }
2913 }
2914
do_xrstor_bndregs(X86Access * ac,target_ulong ptr)2915 static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr)
2916 {
2917 CPUX86State *env = ac->env;
2918 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2919 int i;
2920
2921 for (i = 0; i < 4; i++, addr += 16) {
2922 env->bnd_regs[i].lb = access_ldq(ac, addr);
2923 env->bnd_regs[i].ub = access_ldq(ac, addr + 8);
2924 }
2925 }
2926
do_xrstor_bndcsr(X86Access * ac,target_ulong ptr)2927 static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr)
2928 {
2929 CPUX86State *env = ac->env;
2930
2931 /* FIXME: Extend highest implemented bit of linear address. */
2932 env->bndcs_regs.cfgu
2933 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu));
2934 env->bndcs_regs.sts
2935 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts));
2936 }
2937
do_xrstor_pkru(X86Access * ac,target_ulong ptr)2938 static void do_xrstor_pkru(X86Access *ac, target_ulong ptr)
2939 {
2940 ac->env->pkru = access_ldq(ac, ptr);
2941 }
2942
do_fxrstor(X86Access * ac,target_ulong ptr)2943 static void do_fxrstor(X86Access *ac, target_ulong ptr)
2944 {
2945 CPUX86State *env = ac->env;
2946
2947 do_xrstor_fpu(ac, ptr);
2948 if (env->cr[4] & CR4_OSFXSR_MASK) {
2949 do_xrstor_mxcsr(ac, ptr);
2950 /* Fast FXRSTOR leaves out the XMM registers */
2951 if (!(env->efer & MSR_EFER_FFXSR)
2952 || (env->hflags & HF_CPL_MASK)
2953 || !(env->hflags & HF_LMA_MASK)) {
2954 do_xrstor_sse(ac, ptr);
2955 }
2956 }
2957 }
2958
helper_fxrstor(CPUX86State * env,target_ulong ptr)2959 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2960 {
2961 uintptr_t ra = GETPC();
2962 X86Access ac;
2963
2964 /* The operand must be 16 byte aligned */
2965 if (ptr & 0xf) {
2966 raise_exception_ra(env, EXCP0D_GPF, ra);
2967 }
2968
2969 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
2970 MMU_DATA_LOAD, ra);
2971 do_fxrstor(&ac, ptr);
2972 }
2973
valid_xrstor_header(X86Access * ac,uint64_t * pxsbv,target_ulong ptr)2974 static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv,
2975 target_ulong ptr)
2976 {
2977 uint64_t xstate_bv, xcomp_bv, reserve0;
2978
2979 xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
2980 xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv));
2981 reserve0 = access_ldq(ac, ptr + XO(header.reserve0));
2982 *pxsbv = xstate_bv;
2983
2984 /*
2985 * XCOMP_BV bit 63 indicates compact form, which we do not support,
2986 * and thus must raise #GP. That leaves us in standard form.
2987 * In standard form, bytes 23:8 must be zero -- which is both
2988 * XCOMP_BV and the following 64-bit field.
2989 */
2990 if (xcomp_bv || reserve0) {
2991 return false;
2992 }
2993
2994 /* The XSTATE_BV field must not set bits not present in XCR0. */
2995 return (xstate_bv & ~ac->env->xcr0) == 0;
2996 }
2997
do_xrstor(X86Access * ac,target_ulong ptr,uint64_t rfbm,uint64_t xstate_bv)2998 static void do_xrstor(X86Access *ac, target_ulong ptr,
2999 uint64_t rfbm, uint64_t xstate_bv)
3000 {
3001 CPUX86State *env = ac->env;
3002
3003 if (rfbm & XSTATE_FP_MASK) {
3004 if (xstate_bv & XSTATE_FP_MASK) {
3005 do_xrstor_fpu(ac, ptr);
3006 } else {
3007 do_fninit(env);
3008 memset(env->fpregs, 0, sizeof(env->fpregs));
3009 }
3010 }
3011 if (rfbm & XSTATE_SSE_MASK) {
3012 /* Note that the standard form of XRSTOR loads MXCSR from memory
3013 whether or not the XSTATE_BV bit is set. */
3014 do_xrstor_mxcsr(ac, ptr);
3015 if (xstate_bv & XSTATE_SSE_MASK) {
3016 do_xrstor_sse(ac, ptr);
3017 } else {
3018 do_clear_sse(env);
3019 }
3020 }
3021 if (rfbm & XSTATE_YMM_MASK) {
3022 if (xstate_bv & XSTATE_YMM_MASK) {
3023 do_xrstor_ymmh(ac, ptr + XO(avx_state));
3024 } else {
3025 do_clear_ymmh(env);
3026 }
3027 }
3028 if (rfbm & XSTATE_BNDREGS_MASK) {
3029 if (xstate_bv & XSTATE_BNDREGS_MASK) {
3030 do_xrstor_bndregs(ac, ptr + XO(bndreg_state));
3031 env->hflags |= HF_MPX_IU_MASK;
3032 } else {
3033 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
3034 env->hflags &= ~HF_MPX_IU_MASK;
3035 }
3036 }
3037 if (rfbm & XSTATE_BNDCSR_MASK) {
3038 if (xstate_bv & XSTATE_BNDCSR_MASK) {
3039 do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state));
3040 } else {
3041 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
3042 }
3043 cpu_sync_bndcs_hflags(env);
3044 }
3045 if (rfbm & XSTATE_PKRU_MASK) {
3046 uint64_t old_pkru = env->pkru;
3047 if (xstate_bv & XSTATE_PKRU_MASK) {
3048 do_xrstor_pkru(ac, ptr + XO(pkru_state));
3049 } else {
3050 env->pkru = 0;
3051 }
3052 if (env->pkru != old_pkru) {
3053 CPUState *cs = env_cpu(env);
3054 tlb_flush(cs);
3055 }
3056 }
3057 }
3058
3059 #undef XO
3060
helper_xrstor(CPUX86State * env,target_ulong ptr,uint64_t rfbm)3061 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
3062 {
3063 uintptr_t ra = GETPC();
3064 X86Access ac;
3065 uint64_t xstate_bv;
3066 unsigned size, size_ext;
3067
3068 do_xsave_chk(env, ptr, ra);
3069
3070 /* Begin with just the minimum size to validate the header. */
3071 size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
3072 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
3073 if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) {
3074 raise_exception_ra(env, EXCP0D_GPF, ra);
3075 }
3076
3077 rfbm &= env->xcr0;
3078 size_ext = xsave_area_size(rfbm & xstate_bv, false);
3079 if (size < size_ext) {
3080 /* TODO: See if existing page probe has covered extra size. */
3081 access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra);
3082 }
3083
3084 do_xrstor(&ac, ptr, rfbm, xstate_bv);
3085 }
3086
3087 #if defined(CONFIG_USER_ONLY)
cpu_x86_fsave(CPUX86State * env,void * host,size_t len)3088 void cpu_x86_fsave(CPUX86State *env, void *host, size_t len)
3089 {
3090 X86Access ac = {
3091 .haddr1 = host,
3092 .size = 4 * 7 + 8 * 10,
3093 .env = env,
3094 };
3095
3096 assert(ac.size <= len);
3097 do_fsave(&ac, 0, true);
3098 }
3099
cpu_x86_frstor(CPUX86State * env,void * host,size_t len)3100 void cpu_x86_frstor(CPUX86State *env, void *host, size_t len)
3101 {
3102 X86Access ac = {
3103 .haddr1 = host,
3104 .size = 4 * 7 + 8 * 10,
3105 .env = env,
3106 };
3107
3108 assert(ac.size <= len);
3109 do_frstor(&ac, 0, true);
3110 }
3111
cpu_x86_fxsave(CPUX86State * env,void * host,size_t len)3112 void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len)
3113 {
3114 X86Access ac = {
3115 .haddr1 = host,
3116 .size = sizeof(X86LegacyXSaveArea),
3117 .env = env,
3118 };
3119
3120 assert(ac.size <= len);
3121 do_fxsave(&ac, 0);
3122 }
3123
cpu_x86_fxrstor(CPUX86State * env,void * host,size_t len)3124 void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len)
3125 {
3126 X86Access ac = {
3127 .haddr1 = host,
3128 .size = sizeof(X86LegacyXSaveArea),
3129 .env = env,
3130 };
3131
3132 assert(ac.size <= len);
3133 do_fxrstor(&ac, 0);
3134 }
3135
cpu_x86_xsave(CPUX86State * env,void * host,size_t len,uint64_t rfbm)3136 void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
3137 {
3138 X86Access ac = {
3139 .haddr1 = host,
3140 .env = env,
3141 };
3142
3143 /*
3144 * Since this is only called from user-level signal handling,
3145 * we should have done the job correctly there.
3146 */
3147 assert((rfbm & ~env->xcr0) == 0);
3148 ac.size = xsave_area_size(rfbm, false);
3149 assert(ac.size <= len);
3150 do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm);
3151 }
3152
cpu_x86_xrstor(CPUX86State * env,void * host,size_t len,uint64_t rfbm)3153 bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
3154 {
3155 X86Access ac = {
3156 .haddr1 = host,
3157 .env = env,
3158 };
3159 uint64_t xstate_bv;
3160
3161 /*
3162 * Since this is only called from user-level signal handling,
3163 * we should have done the job correctly there.
3164 */
3165 assert((rfbm & ~env->xcr0) == 0);
3166 ac.size = xsave_area_size(rfbm, false);
3167 assert(ac.size <= len);
3168
3169 if (!valid_xrstor_header(&ac, &xstate_bv, 0)) {
3170 return false;
3171 }
3172 do_xrstor(&ac, 0, rfbm, xstate_bv);
3173 return true;
3174 }
3175 #endif
3176
helper_xgetbv(CPUX86State * env,uint32_t ecx)3177 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
3178 {
3179 /* The OS must have enabled XSAVE. */
3180 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
3181 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
3182 }
3183
3184 switch (ecx) {
3185 case 0:
3186 return env->xcr0;
3187 case 1:
3188 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
3189 return env->xcr0 & get_xinuse(env);
3190 }
3191 break;
3192 }
3193 raise_exception_ra(env, EXCP0D_GPF, GETPC());
3194 }
3195
helper_xsetbv(CPUX86State * env,uint32_t ecx,uint64_t mask)3196 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
3197 {
3198 uint32_t dummy, ena_lo, ena_hi;
3199 uint64_t ena;
3200
3201 /* The OS must have enabled XSAVE. */
3202 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
3203 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
3204 }
3205
3206 /* Only XCR0 is defined at present; the FPU may not be disabled. */
3207 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
3208 goto do_gpf;
3209 }
3210
3211 /* SSE can be disabled, but only if AVX is disabled too. */
3212 if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) {
3213 goto do_gpf;
3214 }
3215
3216 /* Disallow enabling unimplemented features. */
3217 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
3218 ena = ((uint64_t)ena_hi << 32) | ena_lo;
3219 if (mask & ~ena) {
3220 goto do_gpf;
3221 }
3222
3223 /* Disallow enabling only half of MPX. */
3224 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
3225 & XSTATE_BNDCSR_MASK) {
3226 goto do_gpf;
3227 }
3228
3229 env->xcr0 = mask;
3230 cpu_sync_bndcs_hflags(env);
3231 cpu_sync_avx_hflag(env);
3232 return;
3233
3234 do_gpf:
3235 raise_exception_ra(env, EXCP0D_GPF, GETPC());
3236 }
3237
3238 /* MMX/SSE */
3239 /* XXX: optimize by storing fptt and fptags in the static cpu state */
3240
3241 #define SSE_DAZ 0x0040
3242 #define SSE_RC_SHIFT 13
3243 #define SSE_RC_MASK (3 << SSE_RC_SHIFT)
3244 #define SSE_FZ 0x8000
3245
update_mxcsr_status(CPUX86State * env)3246 void update_mxcsr_status(CPUX86State *env)
3247 {
3248 uint32_t mxcsr = env->mxcsr;
3249 int rnd_type;
3250
3251 /* set rounding mode */
3252 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT;
3253 set_x86_rounding_mode(rnd_type, &env->sse_status);
3254
3255 /* Set exception flags. */
3256 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
3257 (mxcsr & FPUS_DE ? float_flag_input_denormal_used : 0) |
3258 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
3259 (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
3260 (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
3261 (mxcsr & FPUS_PE ? float_flag_inexact : 0),
3262 &env->sse_status);
3263
3264 /* set denormals are zero */
3265 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
3266
3267 /* set flush to zero */
3268 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
3269 }
3270
update_mxcsr_from_sse_status(CPUX86State * env)3271 void update_mxcsr_from_sse_status(CPUX86State *env)
3272 {
3273 int flags = get_float_exception_flags(&env->sse_status);
3274 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
3275 (flags & float_flag_input_denormal_used ? FPUS_DE : 0) |
3276 (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
3277 (flags & float_flag_overflow ? FPUS_OE : 0) |
3278 (flags & float_flag_underflow ? FPUS_UE : 0) |
3279 (flags & float_flag_inexact ? FPUS_PE : 0) |
3280 (flags & float_flag_output_denormal_flushed ? FPUS_UE | FPUS_PE :
3281 0));
3282 }
3283
helper_update_mxcsr(CPUX86State * env)3284 void helper_update_mxcsr(CPUX86State *env)
3285 {
3286 update_mxcsr_from_sse_status(env);
3287 }
3288
helper_ldmxcsr(CPUX86State * env,uint32_t val)3289 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
3290 {
3291 cpu_set_mxcsr(env, val);
3292 }
3293
helper_enter_mmx(CPUX86State * env)3294 void helper_enter_mmx(CPUX86State *env)
3295 {
3296 env->fpstt = 0;
3297 *(uint32_t *)(env->fptags) = 0;
3298 *(uint32_t *)(env->fptags + 4) = 0;
3299 }
3300
helper_emms(CPUX86State * env)3301 void helper_emms(CPUX86State *env)
3302 {
3303 /* set to empty state */
3304 *(uint32_t *)(env->fptags) = 0x01010101;
3305 *(uint32_t *)(env->fptags + 4) = 0x01010101;
3306 }
3307
3308 #define SHIFT 0
3309 #include "ops_sse.h"
3310
3311 #define SHIFT 1
3312 #include "ops_sse.h"
3313
3314 #define SHIFT 2
3315 #include "ops_sse.h"
3316