11bccec25SBlue Swirl /* 21bccec25SBlue Swirl * VIS op helpers 31bccec25SBlue Swirl * 41bccec25SBlue Swirl * Copyright (c) 2003-2005 Fabrice Bellard 51bccec25SBlue Swirl * 61bccec25SBlue Swirl * This library is free software; you can redistribute it and/or 71bccec25SBlue Swirl * modify it under the terms of the GNU Lesser General Public 81bccec25SBlue Swirl * License as published by the Free Software Foundation; either 91bccec25SBlue Swirl * version 2 of the License, or (at your option) any later version. 101bccec25SBlue Swirl * 111bccec25SBlue Swirl * This library is distributed in the hope that it will be useful, 121bccec25SBlue Swirl * but WITHOUT ANY WARRANTY; without even the implied warranty of 131bccec25SBlue Swirl * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 141bccec25SBlue Swirl * Lesser General Public License for more details. 151bccec25SBlue Swirl * 161bccec25SBlue Swirl * You should have received a copy of the GNU Lesser General Public 171bccec25SBlue Swirl * License along with this library; if not, see <http://www.gnu.org/licenses/>. 181bccec25SBlue Swirl */ 191bccec25SBlue Swirl 201bccec25SBlue Swirl #include "cpu.h" 211bccec25SBlue Swirl #include "helper.h" 221bccec25SBlue Swirl 231bccec25SBlue Swirl /* This function uses non-native bit order */ 241bccec25SBlue Swirl #define GET_FIELD(X, FROM, TO) \ 251bccec25SBlue Swirl ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) 261bccec25SBlue Swirl 271bccec25SBlue Swirl /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ 281bccec25SBlue Swirl #define GET_FIELD_SP(X, FROM, TO) \ 291bccec25SBlue Swirl GET_FIELD(X, 63 - (TO), 63 - (FROM)) 301bccec25SBlue Swirl 31f027c3b1SRichard Henderson target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) 321bccec25SBlue Swirl { 331bccec25SBlue Swirl return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | 341bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | 351bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | 361bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | 371bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | 381bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | 391bccec25SBlue Swirl (((pixel_addr >> 55) & 1) << 4) | 401bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | 411bccec25SBlue Swirl GET_FIELD_SP(pixel_addr, 11, 12); 421bccec25SBlue Swirl } 431bccec25SBlue Swirl 4403fb8cfcSRichard Henderson uint64_t helper_faligndata(CPUState *env, uint64_t src1, uint64_t src2) 451bccec25SBlue Swirl { 461bccec25SBlue Swirl uint64_t tmp; 471bccec25SBlue Swirl 4803fb8cfcSRichard Henderson tmp = src1 << ((env->gsr & 7) * 8); 491bccec25SBlue Swirl /* on many architectures a shift of 64 does nothing */ 501bccec25SBlue Swirl if ((env->gsr & 7) != 0) { 5103fb8cfcSRichard Henderson tmp |= src2 >> (64 - (env->gsr & 7) * 8); 521bccec25SBlue Swirl } 5303fb8cfcSRichard Henderson return tmp; 541bccec25SBlue Swirl } 551bccec25SBlue Swirl 561bccec25SBlue Swirl #ifdef HOST_WORDS_BIGENDIAN 571bccec25SBlue Swirl #define VIS_B64(n) b[7 - (n)] 581bccec25SBlue Swirl #define VIS_W64(n) w[3 - (n)] 591bccec25SBlue Swirl #define VIS_SW64(n) sw[3 - (n)] 601bccec25SBlue Swirl #define VIS_L64(n) l[1 - (n)] 611bccec25SBlue Swirl #define VIS_B32(n) b[3 - (n)] 621bccec25SBlue Swirl #define VIS_W32(n) w[1 - (n)] 631bccec25SBlue Swirl #else 641bccec25SBlue Swirl #define VIS_B64(n) b[n] 651bccec25SBlue Swirl #define VIS_W64(n) w[n] 661bccec25SBlue Swirl #define VIS_SW64(n) sw[n] 671bccec25SBlue Swirl #define VIS_L64(n) l[n] 681bccec25SBlue Swirl #define VIS_B32(n) b[n] 691bccec25SBlue Swirl #define VIS_W32(n) w[n] 701bccec25SBlue Swirl #endif 711bccec25SBlue Swirl 721bccec25SBlue Swirl typedef union { 731bccec25SBlue Swirl uint8_t b[8]; 741bccec25SBlue Swirl uint16_t w[4]; 751bccec25SBlue Swirl int16_t sw[4]; 761bccec25SBlue Swirl uint32_t l[2]; 771bccec25SBlue Swirl uint64_t ll; 781bccec25SBlue Swirl float64 d; 791bccec25SBlue Swirl } VIS64; 801bccec25SBlue Swirl 811bccec25SBlue Swirl typedef union { 821bccec25SBlue Swirl uint8_t b[4]; 831bccec25SBlue Swirl uint16_t w[2]; 841bccec25SBlue Swirl uint32_t l; 851bccec25SBlue Swirl float32 f; 861bccec25SBlue Swirl } VIS32; 871bccec25SBlue Swirl 88f027c3b1SRichard Henderson uint64_t helper_fpmerge(uint64_t src1, uint64_t src2) 891bccec25SBlue Swirl { 901bccec25SBlue Swirl VIS64 s, d; 911bccec25SBlue Swirl 9203fb8cfcSRichard Henderson s.ll = src1; 9303fb8cfcSRichard Henderson d.ll = src2; 941bccec25SBlue Swirl 951bccec25SBlue Swirl /* Reverse calculation order to handle overlap */ 961bccec25SBlue Swirl d.VIS_B64(7) = s.VIS_B64(3); 971bccec25SBlue Swirl d.VIS_B64(6) = d.VIS_B64(3); 981bccec25SBlue Swirl d.VIS_B64(5) = s.VIS_B64(2); 991bccec25SBlue Swirl d.VIS_B64(4) = d.VIS_B64(2); 1001bccec25SBlue Swirl d.VIS_B64(3) = s.VIS_B64(1); 1011bccec25SBlue Swirl d.VIS_B64(2) = d.VIS_B64(1); 1021bccec25SBlue Swirl d.VIS_B64(1) = s.VIS_B64(0); 1031bccec25SBlue Swirl /* d.VIS_B64(0) = d.VIS_B64(0); */ 1041bccec25SBlue Swirl 10503fb8cfcSRichard Henderson return d.ll; 1061bccec25SBlue Swirl } 1071bccec25SBlue Swirl 108f027c3b1SRichard Henderson uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2) 1091bccec25SBlue Swirl { 1101bccec25SBlue Swirl VIS64 s, d; 1111bccec25SBlue Swirl uint32_t tmp; 1121bccec25SBlue Swirl 11303fb8cfcSRichard Henderson s.ll = src1; 11403fb8cfcSRichard Henderson d.ll = src2; 1151bccec25SBlue Swirl 1161bccec25SBlue Swirl #define PMUL(r) \ 1171bccec25SBlue Swirl tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \ 1181bccec25SBlue Swirl if ((tmp & 0xff) > 0x7f) { \ 1191bccec25SBlue Swirl tmp += 0x100; \ 1201bccec25SBlue Swirl } \ 1211bccec25SBlue Swirl d.VIS_W64(r) = tmp >> 8; 1221bccec25SBlue Swirl 1231bccec25SBlue Swirl PMUL(0); 1241bccec25SBlue Swirl PMUL(1); 1251bccec25SBlue Swirl PMUL(2); 1261bccec25SBlue Swirl PMUL(3); 1271bccec25SBlue Swirl #undef PMUL 1281bccec25SBlue Swirl 12903fb8cfcSRichard Henderson return d.ll; 1301bccec25SBlue Swirl } 1311bccec25SBlue Swirl 132f027c3b1SRichard Henderson uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) 1331bccec25SBlue Swirl { 1341bccec25SBlue Swirl VIS64 s, d; 1351bccec25SBlue Swirl uint32_t tmp; 1361bccec25SBlue Swirl 13703fb8cfcSRichard Henderson s.ll = src1; 13803fb8cfcSRichard Henderson d.ll = src2; 1391bccec25SBlue Swirl 1401bccec25SBlue Swirl #define PMUL(r) \ 1411bccec25SBlue Swirl tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ 1421bccec25SBlue Swirl if ((tmp & 0xff) > 0x7f) { \ 1431bccec25SBlue Swirl tmp += 0x100; \ 1441bccec25SBlue Swirl } \ 1451bccec25SBlue Swirl d.VIS_W64(r) = tmp >> 8; 1461bccec25SBlue Swirl 1471bccec25SBlue Swirl PMUL(0); 1481bccec25SBlue Swirl PMUL(1); 1491bccec25SBlue Swirl PMUL(2); 1501bccec25SBlue Swirl PMUL(3); 1511bccec25SBlue Swirl #undef PMUL 1521bccec25SBlue Swirl 15303fb8cfcSRichard Henderson return d.ll; 1541bccec25SBlue Swirl } 1551bccec25SBlue Swirl 156f027c3b1SRichard Henderson uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) 1571bccec25SBlue Swirl { 1581bccec25SBlue Swirl VIS64 s, d; 1591bccec25SBlue Swirl uint32_t tmp; 1601bccec25SBlue Swirl 16103fb8cfcSRichard Henderson s.ll = src1; 16203fb8cfcSRichard Henderson d.ll = src2; 1631bccec25SBlue Swirl 1641bccec25SBlue Swirl #define PMUL(r) \ 1651bccec25SBlue Swirl tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ 1661bccec25SBlue Swirl if ((tmp & 0xff) > 0x7f) { \ 1671bccec25SBlue Swirl tmp += 0x100; \ 1681bccec25SBlue Swirl } \ 1691bccec25SBlue Swirl d.VIS_W64(r) = tmp >> 8; 1701bccec25SBlue Swirl 1711bccec25SBlue Swirl PMUL(0); 1721bccec25SBlue Swirl PMUL(1); 1731bccec25SBlue Swirl PMUL(2); 1741bccec25SBlue Swirl PMUL(3); 1751bccec25SBlue Swirl #undef PMUL 1761bccec25SBlue Swirl 17703fb8cfcSRichard Henderson return d.ll; 1781bccec25SBlue Swirl } 1791bccec25SBlue Swirl 180f027c3b1SRichard Henderson uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) 1811bccec25SBlue Swirl { 1821bccec25SBlue Swirl VIS64 s, d; 1831bccec25SBlue Swirl uint32_t tmp; 1841bccec25SBlue Swirl 18503fb8cfcSRichard Henderson s.ll = src1; 18603fb8cfcSRichard Henderson d.ll = src2; 1871bccec25SBlue Swirl 1881bccec25SBlue Swirl #define PMUL(r) \ 1891bccec25SBlue Swirl tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ 1901bccec25SBlue Swirl if ((tmp & 0xff) > 0x7f) { \ 1911bccec25SBlue Swirl tmp += 0x100; \ 1921bccec25SBlue Swirl } \ 1931bccec25SBlue Swirl d.VIS_W64(r) = tmp >> 8; 1941bccec25SBlue Swirl 1951bccec25SBlue Swirl PMUL(0); 1961bccec25SBlue Swirl PMUL(1); 1971bccec25SBlue Swirl PMUL(2); 1981bccec25SBlue Swirl PMUL(3); 1991bccec25SBlue Swirl #undef PMUL 2001bccec25SBlue Swirl 20103fb8cfcSRichard Henderson return d.ll; 2021bccec25SBlue Swirl } 2031bccec25SBlue Swirl 204f027c3b1SRichard Henderson uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) 2051bccec25SBlue Swirl { 2061bccec25SBlue Swirl VIS64 s, d; 2071bccec25SBlue Swirl uint32_t tmp; 2081bccec25SBlue Swirl 20903fb8cfcSRichard Henderson s.ll = src1; 21003fb8cfcSRichard Henderson d.ll = src2; 2111bccec25SBlue Swirl 2121bccec25SBlue Swirl #define PMUL(r) \ 2131bccec25SBlue Swirl tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ 2141bccec25SBlue Swirl if ((tmp & 0xff) > 0x7f) { \ 2151bccec25SBlue Swirl tmp += 0x100; \ 2161bccec25SBlue Swirl } \ 2171bccec25SBlue Swirl d.VIS_W64(r) = tmp >> 8; 2181bccec25SBlue Swirl 2191bccec25SBlue Swirl PMUL(0); 2201bccec25SBlue Swirl PMUL(1); 2211bccec25SBlue Swirl PMUL(2); 2221bccec25SBlue Swirl PMUL(3); 2231bccec25SBlue Swirl #undef PMUL 2241bccec25SBlue Swirl 22503fb8cfcSRichard Henderson return d.ll; 2261bccec25SBlue Swirl } 2271bccec25SBlue Swirl 228f027c3b1SRichard Henderson uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2) 2291bccec25SBlue Swirl { 2301bccec25SBlue Swirl VIS64 s, d; 2311bccec25SBlue Swirl uint32_t tmp; 2321bccec25SBlue Swirl 23303fb8cfcSRichard Henderson s.ll = src1; 23403fb8cfcSRichard Henderson d.ll = src2; 2351bccec25SBlue Swirl 2361bccec25SBlue Swirl #define PMUL(r) \ 2371bccec25SBlue Swirl tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ 2381bccec25SBlue Swirl if ((tmp & 0xff) > 0x7f) { \ 2391bccec25SBlue Swirl tmp += 0x100; \ 2401bccec25SBlue Swirl } \ 2411bccec25SBlue Swirl d.VIS_L64(r) = tmp; 2421bccec25SBlue Swirl 2431bccec25SBlue Swirl /* Reverse calculation order to handle overlap */ 2441bccec25SBlue Swirl PMUL(1); 2451bccec25SBlue Swirl PMUL(0); 2461bccec25SBlue Swirl #undef PMUL 2471bccec25SBlue Swirl 24803fb8cfcSRichard Henderson return d.ll; 2491bccec25SBlue Swirl } 2501bccec25SBlue Swirl 251f027c3b1SRichard Henderson uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2) 2521bccec25SBlue Swirl { 2531bccec25SBlue Swirl VIS64 s, d; 2541bccec25SBlue Swirl uint32_t tmp; 2551bccec25SBlue Swirl 25603fb8cfcSRichard Henderson s.ll = src1; 25703fb8cfcSRichard Henderson d.ll = src2; 2581bccec25SBlue Swirl 2591bccec25SBlue Swirl #define PMUL(r) \ 2601bccec25SBlue Swirl tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ 2611bccec25SBlue Swirl if ((tmp & 0xff) > 0x7f) { \ 2621bccec25SBlue Swirl tmp += 0x100; \ 2631bccec25SBlue Swirl } \ 2641bccec25SBlue Swirl d.VIS_L64(r) = tmp; 2651bccec25SBlue Swirl 2661bccec25SBlue Swirl /* Reverse calculation order to handle overlap */ 2671bccec25SBlue Swirl PMUL(1); 2681bccec25SBlue Swirl PMUL(0); 2691bccec25SBlue Swirl #undef PMUL 2701bccec25SBlue Swirl 27103fb8cfcSRichard Henderson return d.ll; 2721bccec25SBlue Swirl } 2731bccec25SBlue Swirl 274f027c3b1SRichard Henderson uint64_t helper_fexpand(uint64_t src1, uint64_t src2) 2751bccec25SBlue Swirl { 2761bccec25SBlue Swirl VIS32 s; 2771bccec25SBlue Swirl VIS64 d; 2781bccec25SBlue Swirl 27903fb8cfcSRichard Henderson s.l = (uint32_t)src1; 28003fb8cfcSRichard Henderson d.ll = src2; 2811bccec25SBlue Swirl d.VIS_W64(0) = s.VIS_B32(0) << 4; 2821bccec25SBlue Swirl d.VIS_W64(1) = s.VIS_B32(1) << 4; 2831bccec25SBlue Swirl d.VIS_W64(2) = s.VIS_B32(2) << 4; 2841bccec25SBlue Swirl d.VIS_W64(3) = s.VIS_B32(3) << 4; 2851bccec25SBlue Swirl 28603fb8cfcSRichard Henderson return d.ll; 2871bccec25SBlue Swirl } 2881bccec25SBlue Swirl 2891bccec25SBlue Swirl #define VIS_HELPER(name, F) \ 290f027c3b1SRichard Henderson uint64_t name##16(uint64_t src1, uint64_t src2) \ 2911bccec25SBlue Swirl { \ 2921bccec25SBlue Swirl VIS64 s, d; \ 2931bccec25SBlue Swirl \ 29403fb8cfcSRichard Henderson s.ll = src1; \ 29503fb8cfcSRichard Henderson d.ll = src2; \ 2961bccec25SBlue Swirl \ 2971bccec25SBlue Swirl d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \ 2981bccec25SBlue Swirl d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \ 2991bccec25SBlue Swirl d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \ 3001bccec25SBlue Swirl d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \ 3011bccec25SBlue Swirl \ 30203fb8cfcSRichard Henderson return d.ll; \ 3031bccec25SBlue Swirl } \ 3041bccec25SBlue Swirl \ 305f027c3b1SRichard Henderson uint32_t name##16s(uint32_t src1, uint32_t src2) \ 3061bccec25SBlue Swirl { \ 3071bccec25SBlue Swirl VIS32 s, d; \ 3081bccec25SBlue Swirl \ 3091bccec25SBlue Swirl s.l = src1; \ 3101bccec25SBlue Swirl d.l = src2; \ 3111bccec25SBlue Swirl \ 3121bccec25SBlue Swirl d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \ 3131bccec25SBlue Swirl d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \ 3141bccec25SBlue Swirl \ 3151bccec25SBlue Swirl return d.l; \ 3161bccec25SBlue Swirl } \ 3171bccec25SBlue Swirl \ 318f027c3b1SRichard Henderson uint64_t name##32(uint64_t src1, uint64_t src2) \ 3191bccec25SBlue Swirl { \ 3201bccec25SBlue Swirl VIS64 s, d; \ 3211bccec25SBlue Swirl \ 32203fb8cfcSRichard Henderson s.ll = src1; \ 32303fb8cfcSRichard Henderson d.ll = src2; \ 3241bccec25SBlue Swirl \ 3251bccec25SBlue Swirl d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \ 3261bccec25SBlue Swirl d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \ 3271bccec25SBlue Swirl \ 32803fb8cfcSRichard Henderson return d.ll; \ 3291bccec25SBlue Swirl } \ 3301bccec25SBlue Swirl \ 331f027c3b1SRichard Henderson uint32_t name##32s(uint32_t src1, uint32_t src2) \ 3321bccec25SBlue Swirl { \ 3331bccec25SBlue Swirl VIS32 s, d; \ 3341bccec25SBlue Swirl \ 3351bccec25SBlue Swirl s.l = src1; \ 3361bccec25SBlue Swirl d.l = src2; \ 3371bccec25SBlue Swirl \ 3381bccec25SBlue Swirl d.l = F(d.l, s.l); \ 3391bccec25SBlue Swirl \ 3401bccec25SBlue Swirl return d.l; \ 3411bccec25SBlue Swirl } 3421bccec25SBlue Swirl 3431bccec25SBlue Swirl #define FADD(a, b) ((a) + (b)) 3441bccec25SBlue Swirl #define FSUB(a, b) ((a) - (b)) 3451bccec25SBlue Swirl VIS_HELPER(helper_fpadd, FADD) 3461bccec25SBlue Swirl VIS_HELPER(helper_fpsub, FSUB) 3471bccec25SBlue Swirl 3481bccec25SBlue Swirl #define VIS_CMPHELPER(name, F) \ 349f027c3b1SRichard Henderson uint64_t name##16(uint64_t src1, uint64_t src2) \ 3501bccec25SBlue Swirl { \ 3511bccec25SBlue Swirl VIS64 s, d; \ 3521bccec25SBlue Swirl \ 35303fb8cfcSRichard Henderson s.ll = src1; \ 35403fb8cfcSRichard Henderson d.ll = src2; \ 3551bccec25SBlue Swirl \ 3561bccec25SBlue Swirl d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ 3571bccec25SBlue Swirl d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ 3581bccec25SBlue Swirl d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ 3591bccec25SBlue Swirl d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ 3601bccec25SBlue Swirl d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ 3611bccec25SBlue Swirl \ 3621bccec25SBlue Swirl return d.ll; \ 3631bccec25SBlue Swirl } \ 3641bccec25SBlue Swirl \ 365f027c3b1SRichard Henderson uint64_t name##32(uint64_t src1, uint64_t src2) \ 3661bccec25SBlue Swirl { \ 3671bccec25SBlue Swirl VIS64 s, d; \ 3681bccec25SBlue Swirl \ 36903fb8cfcSRichard Henderson s.ll = src1; \ 37003fb8cfcSRichard Henderson d.ll = src2; \ 3711bccec25SBlue Swirl \ 3721bccec25SBlue Swirl d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ 3731bccec25SBlue Swirl d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ 3741bccec25SBlue Swirl d.VIS_L64(1) = 0; \ 3751bccec25SBlue Swirl \ 3761bccec25SBlue Swirl return d.ll; \ 3771bccec25SBlue Swirl } 3781bccec25SBlue Swirl 3791bccec25SBlue Swirl #define FCMPGT(a, b) ((a) > (b)) 3801bccec25SBlue Swirl #define FCMPEQ(a, b) ((a) == (b)) 3811bccec25SBlue Swirl #define FCMPLE(a, b) ((a) <= (b)) 3821bccec25SBlue Swirl #define FCMPNE(a, b) ((a) != (b)) 3831bccec25SBlue Swirl 3841bccec25SBlue Swirl VIS_CMPHELPER(helper_fcmpgt, FCMPGT) 3851bccec25SBlue Swirl VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) 3861bccec25SBlue Swirl VIS_CMPHELPER(helper_fcmple, FCMPLE) 3871bccec25SBlue Swirl VIS_CMPHELPER(helper_fcmpne, FCMPNE) 388f888300bSRichard Henderson 389f888300bSRichard Henderson uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) 390f888300bSRichard Henderson { 391f888300bSRichard Henderson int i; 392f888300bSRichard Henderson for (i = 0; i < 8; i++) { 393f888300bSRichard Henderson int s1, s2; 394f888300bSRichard Henderson 395f888300bSRichard Henderson s1 = (src1 >> (56 - (i * 8))) & 0xff; 396f888300bSRichard Henderson s2 = (src2 >> (56 - (i * 8))) & 0xff; 397f888300bSRichard Henderson 398f888300bSRichard Henderson /* Absolute value of difference. */ 399f888300bSRichard Henderson s1 -= s2; 400f888300bSRichard Henderson if (s1 < 0) { 401f888300bSRichard Henderson s1 = -s1; 402f888300bSRichard Henderson } 403f888300bSRichard Henderson 404f888300bSRichard Henderson sum += s1; 405f888300bSRichard Henderson } 406f888300bSRichard Henderson 407f888300bSRichard Henderson return sum; 408f888300bSRichard Henderson } 4092dedf314SRichard Henderson 4102dedf314SRichard Henderson uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) 4112dedf314SRichard Henderson { 4122dedf314SRichard Henderson int scale = (gsr >> 3) & 0xf; 4132dedf314SRichard Henderson uint32_t ret = 0; 4142dedf314SRichard Henderson int byte; 4152dedf314SRichard Henderson 4162dedf314SRichard Henderson for (byte = 0; byte < 4; byte++) { 4172dedf314SRichard Henderson uint32_t val; 4182dedf314SRichard Henderson int16_t src = rs2 >> (byte * 16); 4192dedf314SRichard Henderson int32_t scaled = src << scale; 4202dedf314SRichard Henderson int32_t from_fixed = scaled >> 7; 4212dedf314SRichard Henderson 4222dedf314SRichard Henderson val = (from_fixed < 0 ? 0 : 4232dedf314SRichard Henderson from_fixed > 255 ? 255 : from_fixed); 4242dedf314SRichard Henderson 4252dedf314SRichard Henderson ret |= val << (8 * byte); 4262dedf314SRichard Henderson } 4272dedf314SRichard Henderson 4282dedf314SRichard Henderson return ret; 4292dedf314SRichard Henderson } 4302dedf314SRichard Henderson 4312dedf314SRichard Henderson uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) 4322dedf314SRichard Henderson { 4332dedf314SRichard Henderson int scale = (gsr >> 3) & 0x1f; 4342dedf314SRichard Henderson uint64_t ret = 0; 4352dedf314SRichard Henderson int word; 4362dedf314SRichard Henderson 4372dedf314SRichard Henderson ret = (rs1 << 8) & ~(0x000000ff000000ffULL); 4382dedf314SRichard Henderson for (word = 0; word < 2; word++) { 4392dedf314SRichard Henderson uint64_t val; 4402dedf314SRichard Henderson int32_t src = rs2 >> (word * 32); 4412dedf314SRichard Henderson int64_t scaled = (int64_t)src << scale; 4422dedf314SRichard Henderson int64_t from_fixed = scaled >> 23; 4432dedf314SRichard Henderson 4442dedf314SRichard Henderson val = (from_fixed < 0 ? 0 : 4452dedf314SRichard Henderson (from_fixed > 255) ? 255 : from_fixed); 4462dedf314SRichard Henderson 4472dedf314SRichard Henderson ret |= val << (32 * word); 4482dedf314SRichard Henderson } 4492dedf314SRichard Henderson 4502dedf314SRichard Henderson return ret; 4512dedf314SRichard Henderson } 4522dedf314SRichard Henderson 4532dedf314SRichard Henderson uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) 4542dedf314SRichard Henderson { 4552dedf314SRichard Henderson int scale = (gsr >> 3) & 0x1f; 4562dedf314SRichard Henderson uint32_t ret = 0; 4572dedf314SRichard Henderson int word; 4582dedf314SRichard Henderson 4592dedf314SRichard Henderson for (word = 0; word < 2; word++) { 4602dedf314SRichard Henderson uint32_t val; 4612dedf314SRichard Henderson int32_t src = rs2 >> (word * 32); 4622dedf314SRichard Henderson int64_t scaled = src << scale; 4632dedf314SRichard Henderson int64_t from_fixed = scaled >> 16; 4642dedf314SRichard Henderson 4652dedf314SRichard Henderson val = (from_fixed < -32768 ? -32768 : 4662dedf314SRichard Henderson from_fixed > 32767 ? 32767 : from_fixed); 4672dedf314SRichard Henderson 4682dedf314SRichard Henderson ret |= (val & 0xffff) << (word * 16); 4692dedf314SRichard Henderson } 4702dedf314SRichard Henderson 4712dedf314SRichard Henderson return ret; 4722dedf314SRichard Henderson } 473793a137aSRichard Henderson 474793a137aSRichard Henderson uint64 helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) 475793a137aSRichard Henderson { 476793a137aSRichard Henderson union { 477793a137aSRichard Henderson uint64_t ll[2]; 478793a137aSRichard Henderson uint8_t b[16]; 479793a137aSRichard Henderson } s; 480793a137aSRichard Henderson VIS64 r; 481793a137aSRichard Henderson uint32_t i, mask, host; 482793a137aSRichard Henderson 483793a137aSRichard Henderson /* Set up S such that we can index across all of the bytes. */ 484793a137aSRichard Henderson #ifdef HOST_WORDS_BIGENDIAN 485793a137aSRichard Henderson s.ll[0] = src1; 486793a137aSRichard Henderson s.ll[1] = src2; 487793a137aSRichard Henderson host = 0; 488793a137aSRichard Henderson #else 489793a137aSRichard Henderson s.ll[1] = src1; 490793a137aSRichard Henderson s.ll[0] = src2; 491793a137aSRichard Henderson host = 15; 492793a137aSRichard Henderson #endif 493793a137aSRichard Henderson mask = gsr >> 32; 494793a137aSRichard Henderson 495793a137aSRichard Henderson for (i = 0; i < 8; ++i) { 496793a137aSRichard Henderson unsigned e = (mask >> (28 - i*4)) & 0xf; 497793a137aSRichard Henderson r.VIS_B64(i) = s.b[e ^ host]; 498793a137aSRichard Henderson } 499793a137aSRichard Henderson 500793a137aSRichard Henderson return r.ll; 501793a137aSRichard Henderson } 502