1 /*
2 * Test some fused multiply add corner cases.
3 *
4 * SPDX-License-Identifier: GPL-2.0-or-later
5 */
6 #include <stdio.h>
7 #include <stdint.h>
8 #include <stdbool.h>
9 #include <inttypes.h>
10
11 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
12
13 /*
14 * Perform one "n * m + a" operation using the vfmadd insn and return
15 * the result; on return *mxcsr_p is set to the bottom 6 bits of MXCSR
16 * (the Flag bits). If ftz is true then we set MXCSR.FTZ while doing
17 * the operation.
18 * We print the operation and its results to stdout.
19 */
do_fmadd(uint64_t n,uint64_t m,uint64_t a,bool ftz,uint32_t * mxcsr_p)20 static uint64_t do_fmadd(uint64_t n, uint64_t m, uint64_t a,
21 bool ftz, uint32_t *mxcsr_p)
22 {
23 uint64_t r;
24 uint32_t mxcsr = 0;
25 uint32_t ftz_bit = ftz ? (1 << 15) : 0;
26 uint32_t saved_mxcsr = 0;
27
28 asm volatile("stmxcsr %[saved_mxcsr]\n"
29 "stmxcsr %[mxcsr]\n"
30 "andl $0xffff7fc0, %[mxcsr]\n"
31 "orl %[ftz_bit], %[mxcsr]\n"
32 "ldmxcsr %[mxcsr]\n"
33 "movq %[a], %%xmm0\n"
34 "movq %[m], %%xmm1\n"
35 "movq %[n], %%xmm2\n"
36 /* xmm0 = xmm0 + xmm2 * xmm1 */
37 "vfmadd231sd %%xmm1, %%xmm2, %%xmm0\n"
38 "movq %%xmm0, %[r]\n"
39 "stmxcsr %[mxcsr]\n"
40 "ldmxcsr %[saved_mxcsr]\n"
41 : [r] "=r" (r), [mxcsr] "=m" (mxcsr),
42 [saved_mxcsr] "=m" (saved_mxcsr)
43 : [n] "r" (n), [m] "r" (m), [a] "r" (a),
44 [ftz_bit] "r" (ftz_bit)
45 : "xmm0", "xmm1", "xmm2");
46 *mxcsr_p = mxcsr & 0x3f;
47 printf("vfmadd132sd 0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64
48 " = 0x%" PRIx64 " MXCSR flags 0x%" PRIx32 "\n",
49 n, m, a, r, *mxcsr_p);
50 return r;
51 }
52
53 typedef struct testdata {
54 /* Input n, m, a */
55 uint64_t n;
56 uint64_t m;
57 uint64_t a;
58 bool ftz;
59 /* Expected result */
60 uint64_t expected_r;
61 /* Expected low 6 bits of MXCSR (the Flag bits) */
62 uint32_t expected_mxcsr;
63 } testdata;
64
65 static testdata tests[] = {
66 { 0, 0x7ff0000000000000, 0x7ff000000000aaaa, false, /* 0 * Inf + SNaN */
67 0x7ff800000000aaaa, 1 }, /* Should be QNaN and does raise Invalid */
68 { 0, 0x7ff0000000000000, 0x7ff800000000aaaa, false, /* 0 * Inf + QNaN */
69 0x7ff800000000aaaa, 0 }, /* Should be QNaN and does *not* raise Invalid */
70 /*
71 * These inputs give a result which is tiny before rounding but which
72 * becomes non-tiny after rounding. x86 is a "detect tininess after
73 * rounding" architecture, so it should give a non-denormal result and
74 * not set the Underflow flag (only the Precision flag for an inexact
75 * result).
76 */
77 { 0x3fdfffffffffffff, 0x001fffffffffffff, 0x801fffffffffffff, false,
78 0x8010000000000000, 0x20 },
79 /*
80 * Flushing of denormal outputs to zero should also happen after
81 * rounding, so setting FTZ should not affect the result or the flags.
82 */
83 { 0x3fdfffffffffffff, 0x001fffffffffffff, 0x801fffffffffffff, true,
84 0x8010000000000000, 0x20 }, /* Enabling FTZ shouldn't change flags */
85 /*
86 * normal * 0 + a denormal. With FTZ disabled this gives an exact
87 * result (equal to the input denormal) that has consumed the denormal.
88 */
89 { 0x3cc8000000000000, 0x0000000000000000, 0x8008000000000000, false,
90 0x8008000000000000, 0x2 }, /* Denormal */
91 /*
92 * With FTZ enabled, this consumes the denormal, returns zero (because
93 * flushed) and indicates also Underflow and Precision.
94 */
95 { 0x3cc8000000000000, 0x0000000000000000, 0x8008000000000000, true,
96 0x8000000000000000, 0x32 }, /* Precision, Underflow, Denormal */
97 };
98
main(void)99 int main(void)
100 {
101 bool passed = true;
102 for (int i = 0; i < ARRAY_SIZE(tests); i++) {
103 uint32_t mxcsr;
104 uint64_t r = do_fmadd(tests[i].n, tests[i].m, tests[i].a,
105 tests[i].ftz, &mxcsr);
106 if (r != tests[i].expected_r) {
107 printf("expected result 0x%" PRIx64 "\n", tests[i].expected_r);
108 passed = false;
109 }
110 if (mxcsr != tests[i].expected_mxcsr) {
111 printf("expected MXCSR flags 0x%x\n", tests[i].expected_mxcsr);
112 passed = false;
113 }
114 }
115 return passed ? 0 : 1;
116 }
117