1*6dc29354SIlya Leoshkevich /* 2*6dc29354SIlya Leoshkevich * Test floating-point multiply-and-add instructions. 3*6dc29354SIlya Leoshkevich * 4*6dc29354SIlya Leoshkevich * SPDX-License-Identifier: GPL-2.0-or-later 5*6dc29354SIlya Leoshkevich */ 6*6dc29354SIlya Leoshkevich #include <fenv.h> 7*6dc29354SIlya Leoshkevich #include <stdbool.h> 8*6dc29354SIlya Leoshkevich #include <stdio.h> 9*6dc29354SIlya Leoshkevich #include <stdlib.h> 10*6dc29354SIlya Leoshkevich #include <string.h> 11*6dc29354SIlya Leoshkevich #include "float.h" 12*6dc29354SIlya Leoshkevich 13*6dc29354SIlya Leoshkevich union val { 14*6dc29354SIlya Leoshkevich float e; 15*6dc29354SIlya Leoshkevich double d; 16*6dc29354SIlya Leoshkevich long double x; 17*6dc29354SIlya Leoshkevich char buf[16]; 18*6dc29354SIlya Leoshkevich }; 19*6dc29354SIlya Leoshkevich 20*6dc29354SIlya Leoshkevich /* 21*6dc29354SIlya Leoshkevich * PoP tables as close to the original as possible. 22*6dc29354SIlya Leoshkevich */ 23*6dc29354SIlya Leoshkevich static const char *table1[N_SIGNED_CLASSES][N_SIGNED_CLASSES] = { 24*6dc29354SIlya Leoshkevich /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 25*6dc29354SIlya Leoshkevich {/* -inf */ "P(+inf)", "P(+inf)", "Xi: T(dNaN)", "Xi: T(dNaN)", "P(-inf)", "P(-inf)", "P(b)", "Xi: T(b*)"}, 26*6dc29354SIlya Leoshkevich {/* -Fn */ "P(+inf)", "P(a*b)", "P(+0)", "P(-0)", "P(a*b)", "P(-inf)", "P(b)", "Xi: T(b*)"}, 27*6dc29354SIlya Leoshkevich {/* -0 */ "Xi: T(dNaN)", "P(+0)", "P(+0)", "P(-0)", "P(-0)", "Xi: T(dNaN)", "P(b)", "Xi: T(b*)"}, 28*6dc29354SIlya Leoshkevich {/* +0 */ "Xi: T(dNaN)", "P(-0)", "P(-0)", "P(+0)", "P(+0)", "Xi: T(dNaN)", "P(b)", "Xi: T(b*)"}, 29*6dc29354SIlya Leoshkevich {/* +Fn */ "P(-inf)", "P(a*b)", "P(-0)", "P(+0)", "P(a*b)", "P(+inf)", "P(b)", "Xi: T(b*)"}, 30*6dc29354SIlya Leoshkevich {/* +inf */ "P(-inf)", "P(-inf)", "Xi: T(dNaN)", "Xi: T(dNaN)", "P(+inf)", "P(+inf)", "P(b)", "Xi: T(b*)"}, 31*6dc29354SIlya Leoshkevich {/* QNaN */ "P(a)", "P(a)", "P(a)", "P(a)", "P(a)", "P(a)", "P(a)", "Xi: T(b*)"}, 32*6dc29354SIlya Leoshkevich {/* SNaN */ "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)", "Xi: T(a*)"}, 33*6dc29354SIlya Leoshkevich }; 34*6dc29354SIlya Leoshkevich 35*6dc29354SIlya Leoshkevich static const char *table2[N_SIGNED_CLASSES][N_SIGNED_CLASSES] = { 36*6dc29354SIlya Leoshkevich /* -inf -Fn -0 +0 +Fn +inf QNaN SNaN */ 37*6dc29354SIlya Leoshkevich {/* -inf */ "T(-inf)", "T(-inf)", "T(-inf)", "T(-inf)", "T(-inf)", "Xi: T(dNaN)", "T(c)", "Xi: T(c*)"}, 38*6dc29354SIlya Leoshkevich {/* -Fn */ "T(-inf)", "R(p+c)", "R(p)", "R(p)", "R(p+c)", "T(+inf)", "T(c)", "Xi: T(c*)"}, 39*6dc29354SIlya Leoshkevich {/* -0 */ "T(-inf)", "R(c)", "T(-0)", "Rezd", "R(c)", "T(+inf)", "T(c)", "Xi: T(c*)"}, 40*6dc29354SIlya Leoshkevich {/* +0 */ "T(-inf)", "R(c)", "Rezd", "T(+0)", "R(c)", "T(+inf)", "T(c)", "Xi: T(c*)"}, 41*6dc29354SIlya Leoshkevich {/* +Fn */ "T(-inf)", "R(p+c)", "R(p)", "R(p)", "R(p+c)", "T(+inf)", "T(c)", "Xi: T(c*)"}, 42*6dc29354SIlya Leoshkevich {/* +inf */ "Xi: T(dNaN)", "T(+inf)", "T(+inf)", "T(+inf)", "T(+inf)", "T(+inf)", "T(c)", "Xi: T(c*)"}, 43*6dc29354SIlya Leoshkevich {/* QNaN */ "T(p)", "T(p)", "T(p)", "T(p)", "T(p)", "T(p)", "T(p)", "Xi: T(c*)"}, 44*6dc29354SIlya Leoshkevich /* SNaN: can't happen */ 45*6dc29354SIlya Leoshkevich }; 46*6dc29354SIlya Leoshkevich 47*6dc29354SIlya Leoshkevich static void interpret_tables(union val *r, bool *xi, int fmt, 48*6dc29354SIlya Leoshkevich int cls_a, const union val *a, 49*6dc29354SIlya Leoshkevich int cls_b, const union val *b, 50*6dc29354SIlya Leoshkevich int cls_c, const union val *c) 51*6dc29354SIlya Leoshkevich { 52*6dc29354SIlya Leoshkevich const char *spec1 = table1[cls_a][cls_b]; 53*6dc29354SIlya Leoshkevich const char *spec2; 54*6dc29354SIlya Leoshkevich union val p; 55*6dc29354SIlya Leoshkevich int cls_p; 56*6dc29354SIlya Leoshkevich 57*6dc29354SIlya Leoshkevich *xi = false; 58*6dc29354SIlya Leoshkevich 59*6dc29354SIlya Leoshkevich if (strcmp(spec1, "P(-inf)") == 0) { 60*6dc29354SIlya Leoshkevich cls_p = CLASS_MINUS_INF; 61*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(+inf)") == 0) { 62*6dc29354SIlya Leoshkevich cls_p = CLASS_PLUS_INF; 63*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(-0)") == 0) { 64*6dc29354SIlya Leoshkevich cls_p = CLASS_MINUS_ZERO; 65*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(+0)") == 0) { 66*6dc29354SIlya Leoshkevich cls_p = CLASS_PLUS_ZERO; 67*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(a)") == 0) { 68*6dc29354SIlya Leoshkevich cls_p = cls_a; 69*6dc29354SIlya Leoshkevich memcpy(&p, a, sizeof(p)); 70*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(b)") == 0) { 71*6dc29354SIlya Leoshkevich cls_p = cls_b; 72*6dc29354SIlya Leoshkevich memcpy(&p, b, sizeof(p)); 73*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "P(a*b)") == 0) { 74*6dc29354SIlya Leoshkevich /* 75*6dc29354SIlya Leoshkevich * In the general case splitting fma into multiplication and addition 76*6dc29354SIlya Leoshkevich * doesn't work, but this is the case with our test inputs. 77*6dc29354SIlya Leoshkevich */ 78*6dc29354SIlya Leoshkevich cls_p = cls_a == cls_b ? CLASS_PLUS_FN : CLASS_MINUS_FN; 79*6dc29354SIlya Leoshkevich switch (fmt) { 80*6dc29354SIlya Leoshkevich case 0: 81*6dc29354SIlya Leoshkevich p.e = a->e * b->e; 82*6dc29354SIlya Leoshkevich break; 83*6dc29354SIlya Leoshkevich case 1: 84*6dc29354SIlya Leoshkevich p.d = a->d * b->d; 85*6dc29354SIlya Leoshkevich break; 86*6dc29354SIlya Leoshkevich case 2: 87*6dc29354SIlya Leoshkevich p.x = a->x * b->x; 88*6dc29354SIlya Leoshkevich break; 89*6dc29354SIlya Leoshkevich default: 90*6dc29354SIlya Leoshkevich fprintf(stderr, "Unsupported fmt: %d\n", fmt); 91*6dc29354SIlya Leoshkevich exit(1); 92*6dc29354SIlya Leoshkevich } 93*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "Xi: T(dNaN)") == 0) { 94*6dc29354SIlya Leoshkevich memcpy(r, default_nans[fmt], sizeof(*r)); 95*6dc29354SIlya Leoshkevich *xi = true; 96*6dc29354SIlya Leoshkevich return; 97*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "Xi: T(a*)") == 0) { 98*6dc29354SIlya Leoshkevich memcpy(r, a, sizeof(*r)); 99*6dc29354SIlya Leoshkevich snan_to_qnan(r->buf, fmt); 100*6dc29354SIlya Leoshkevich *xi = true; 101*6dc29354SIlya Leoshkevich return; 102*6dc29354SIlya Leoshkevich } else if (strcmp(spec1, "Xi: T(b*)") == 0) { 103*6dc29354SIlya Leoshkevich memcpy(r, b, sizeof(*r)); 104*6dc29354SIlya Leoshkevich snan_to_qnan(r->buf, fmt); 105*6dc29354SIlya Leoshkevich *xi = true; 106*6dc29354SIlya Leoshkevich return; 107*6dc29354SIlya Leoshkevich } else { 108*6dc29354SIlya Leoshkevich fprintf(stderr, "Unsupported spec1: %s\n", spec1); 109*6dc29354SIlya Leoshkevich exit(1); 110*6dc29354SIlya Leoshkevich } 111*6dc29354SIlya Leoshkevich 112*6dc29354SIlya Leoshkevich spec2 = table2[cls_p][cls_c]; 113*6dc29354SIlya Leoshkevich if (strcmp(spec2, "T(-inf)") == 0) { 114*6dc29354SIlya Leoshkevich memcpy(r, signed_floats[fmt][CLASS_MINUS_INF].v[0], sizeof(*r)); 115*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "T(+inf)") == 0) { 116*6dc29354SIlya Leoshkevich memcpy(r, signed_floats[fmt][CLASS_PLUS_INF].v[0], sizeof(*r)); 117*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "T(-0)") == 0) { 118*6dc29354SIlya Leoshkevich memcpy(r, signed_floats[fmt][CLASS_MINUS_ZERO].v[0], sizeof(*r)); 119*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "T(+0)") == 0 || strcmp(spec2, "Rezd") == 0) { 120*6dc29354SIlya Leoshkevich memcpy(r, signed_floats[fmt][CLASS_PLUS_ZERO].v[0], sizeof(*r)); 121*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "R(c)") == 0 || strcmp(spec2, "T(c)") == 0) { 122*6dc29354SIlya Leoshkevich memcpy(r, c, sizeof(*r)); 123*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "R(p)") == 0 || strcmp(spec2, "T(p)") == 0) { 124*6dc29354SIlya Leoshkevich memcpy(r, &p, sizeof(*r)); 125*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "R(p+c)") == 0 || strcmp(spec2, "T(p+c)") == 0) { 126*6dc29354SIlya Leoshkevich switch (fmt) { 127*6dc29354SIlya Leoshkevich case 0: 128*6dc29354SIlya Leoshkevich r->e = p.e + c->e; 129*6dc29354SIlya Leoshkevich break; 130*6dc29354SIlya Leoshkevich case 1: 131*6dc29354SIlya Leoshkevich r->d = p.d + c->d; 132*6dc29354SIlya Leoshkevich break; 133*6dc29354SIlya Leoshkevich case 2: 134*6dc29354SIlya Leoshkevich r->x = p.x + c->x; 135*6dc29354SIlya Leoshkevich break; 136*6dc29354SIlya Leoshkevich default: 137*6dc29354SIlya Leoshkevich fprintf(stderr, "Unsupported fmt: %d\n", fmt); 138*6dc29354SIlya Leoshkevich exit(1); 139*6dc29354SIlya Leoshkevich } 140*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "Xi: T(dNaN)") == 0) { 141*6dc29354SIlya Leoshkevich memcpy(r, default_nans[fmt], sizeof(*r)); 142*6dc29354SIlya Leoshkevich *xi = true; 143*6dc29354SIlya Leoshkevich } else if (strcmp(spec2, "Xi: T(c*)") == 0) { 144*6dc29354SIlya Leoshkevich memcpy(r, c, sizeof(*r)); 145*6dc29354SIlya Leoshkevich snan_to_qnan(r->buf, fmt); 146*6dc29354SIlya Leoshkevich *xi = true; 147*6dc29354SIlya Leoshkevich } else { 148*6dc29354SIlya Leoshkevich fprintf(stderr, "Unsupported spec2: %s\n", spec2); 149*6dc29354SIlya Leoshkevich exit(1); 150*6dc29354SIlya Leoshkevich } 151*6dc29354SIlya Leoshkevich } 152*6dc29354SIlya Leoshkevich 153*6dc29354SIlya Leoshkevich struct iter { 154*6dc29354SIlya Leoshkevich int fmt; 155*6dc29354SIlya Leoshkevich int cls[3]; 156*6dc29354SIlya Leoshkevich int val[3]; 157*6dc29354SIlya Leoshkevich }; 158*6dc29354SIlya Leoshkevich 159*6dc29354SIlya Leoshkevich static bool iter_next(struct iter *it) 160*6dc29354SIlya Leoshkevich { 161*6dc29354SIlya Leoshkevich int i; 162*6dc29354SIlya Leoshkevich 163*6dc29354SIlya Leoshkevich for (i = 2; i >= 0; i--) { 164*6dc29354SIlya Leoshkevich if (++it->val[i] != signed_floats[it->fmt][it->cls[i]].n) { 165*6dc29354SIlya Leoshkevich return true; 166*6dc29354SIlya Leoshkevich } 167*6dc29354SIlya Leoshkevich it->val[i] = 0; 168*6dc29354SIlya Leoshkevich 169*6dc29354SIlya Leoshkevich if (++it->cls[i] != N_SIGNED_CLASSES) { 170*6dc29354SIlya Leoshkevich return true; 171*6dc29354SIlya Leoshkevich } 172*6dc29354SIlya Leoshkevich it->cls[i] = 0; 173*6dc29354SIlya Leoshkevich } 174*6dc29354SIlya Leoshkevich 175*6dc29354SIlya Leoshkevich return ++it->fmt != N_FORMATS; 176*6dc29354SIlya Leoshkevich } 177*6dc29354SIlya Leoshkevich 178*6dc29354SIlya Leoshkevich int main(void) 179*6dc29354SIlya Leoshkevich { 180*6dc29354SIlya Leoshkevich int ret = EXIT_SUCCESS; 181*6dc29354SIlya Leoshkevich struct iter it = {}; 182*6dc29354SIlya Leoshkevich 183*6dc29354SIlya Leoshkevich do { 184*6dc29354SIlya Leoshkevich size_t n = float_sizes[it.fmt]; 185*6dc29354SIlya Leoshkevich union val a, b, c, exp, res; 186*6dc29354SIlya Leoshkevich bool xi_exp, xi; 187*6dc29354SIlya Leoshkevich 188*6dc29354SIlya Leoshkevich memcpy(&a, signed_floats[it.fmt][it.cls[0]].v[it.val[0]], sizeof(a)); 189*6dc29354SIlya Leoshkevich memcpy(&b, signed_floats[it.fmt][it.cls[1]].v[it.val[1]], sizeof(b)); 190*6dc29354SIlya Leoshkevich memcpy(&c, signed_floats[it.fmt][it.cls[2]].v[it.val[2]], sizeof(c)); 191*6dc29354SIlya Leoshkevich 192*6dc29354SIlya Leoshkevich interpret_tables(&exp, &xi_exp, it.fmt, 193*6dc29354SIlya Leoshkevich it.cls[1], &b, it.cls[2], &c, it.cls[0], &a); 194*6dc29354SIlya Leoshkevich 195*6dc29354SIlya Leoshkevich memcpy(&res, &a, sizeof(res)); 196*6dc29354SIlya Leoshkevich feclearexcept(FE_ALL_EXCEPT); 197*6dc29354SIlya Leoshkevich switch (it.fmt) { 198*6dc29354SIlya Leoshkevich case 0: 199*6dc29354SIlya Leoshkevich asm("maebr %[a],%[b],%[c]" 200*6dc29354SIlya Leoshkevich : [a] "+f" (res.e) : [b] "f" (b.e), [c] "f" (c.e)); 201*6dc29354SIlya Leoshkevich break; 202*6dc29354SIlya Leoshkevich case 1: 203*6dc29354SIlya Leoshkevich asm("madbr %[a],%[b],%[c]" 204*6dc29354SIlya Leoshkevich : [a] "+f" (res.d) : [b] "f" (b.d), [c] "f" (c.d)); 205*6dc29354SIlya Leoshkevich break; 206*6dc29354SIlya Leoshkevich case 2: 207*6dc29354SIlya Leoshkevich asm("wfmaxb %[a],%[c],%[b],%[a]" 208*6dc29354SIlya Leoshkevich : [a] "+v" (res.x) : [b] "v" (b.x), [c] "v" (c.x)); 209*6dc29354SIlya Leoshkevich break; 210*6dc29354SIlya Leoshkevich default: 211*6dc29354SIlya Leoshkevich fprintf(stderr, "Unsupported fmt: %d\n", it.fmt); 212*6dc29354SIlya Leoshkevich exit(1); 213*6dc29354SIlya Leoshkevich } 214*6dc29354SIlya Leoshkevich xi = fetestexcept(FE_ALL_EXCEPT) == FE_INVALID; 215*6dc29354SIlya Leoshkevich 216*6dc29354SIlya Leoshkevich if (memcmp(&res, &exp, n) != 0 || xi != xi_exp) { 217*6dc29354SIlya Leoshkevich fprintf(stderr, "[ FAILED ] "); 218*6dc29354SIlya Leoshkevich dump_v(stderr, &b, n); 219*6dc29354SIlya Leoshkevich fprintf(stderr, " * "); 220*6dc29354SIlya Leoshkevich dump_v(stderr, &c, n); 221*6dc29354SIlya Leoshkevich fprintf(stderr, " + "); 222*6dc29354SIlya Leoshkevich dump_v(stderr, &a, n); 223*6dc29354SIlya Leoshkevich fprintf(stderr, ": actual="); 224*6dc29354SIlya Leoshkevich dump_v(stderr, &res, n); 225*6dc29354SIlya Leoshkevich fprintf(stderr, "/%d, expected=", (int)xi); 226*6dc29354SIlya Leoshkevich dump_v(stderr, &exp, n); 227*6dc29354SIlya Leoshkevich fprintf(stderr, "/%d\n", (int)xi_exp); 228*6dc29354SIlya Leoshkevich ret = EXIT_FAILURE; 229*6dc29354SIlya Leoshkevich } 230*6dc29354SIlya Leoshkevich } while (iter_next(&it)); 231*6dc29354SIlya Leoshkevich 232*6dc29354SIlya Leoshkevich return ret; 233*6dc29354SIlya Leoshkevich } 234