11bccec25SBlue Swirl /* 21bccec25SBlue Swirl * VIS op helpers 31bccec25SBlue Swirl * 41bccec25SBlue Swirl * Copyright (c) 2003-2005 Fabrice Bellard 51bccec25SBlue Swirl * 61bccec25SBlue Swirl * This library is free software; you can redistribute it and/or 71bccec25SBlue Swirl * modify it under the terms of the GNU Lesser General Public 81bccec25SBlue Swirl * License as published by the Free Software Foundation; either 95650b549SChetan Pant * version 2.1 of the License, or (at your option) any later version. 101bccec25SBlue Swirl * 111bccec25SBlue Swirl * This library is distributed in the hope that it will be useful, 121bccec25SBlue Swirl * but WITHOUT ANY WARRANTY; without even the implied warranty of 131bccec25SBlue Swirl * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 141bccec25SBlue Swirl * Lesser General Public License for more details. 151bccec25SBlue Swirl * 161bccec25SBlue Swirl * You should have received a copy of the GNU Lesser General Public 171bccec25SBlue Swirl * License along with this library; if not, see <http://www.gnu.org/licenses/>. 181bccec25SBlue Swirl */ 191bccec25SBlue Swirl 20db5ebe5fSPeter Maydell #include "qemu/osdep.h" 211bccec25SBlue Swirl #include "cpu.h" 222ef6175aSRichard Henderson #include "exec/helper-proto.h" 231bccec25SBlue Swirl 241bccec25SBlue Swirl /* This function uses non-native bit order */ 251bccec25SBlue Swirl #define GET_FIELD(X, FROM, TO) \ 261bccec25SBlue Swirl ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) 271bccec25SBlue Swirl 281bccec25SBlue Swirl /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ 291bccec25SBlue Swirl #define GET_FIELD_SP(X, FROM, TO) \ 301bccec25SBlue Swirl GET_FIELD(X, 63 - (TO), 63 - (FROM)) 311bccec25SBlue Swirl 32f027c3b1SRichard Henderson target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) 331bccec25SBlue Swirl { 341bccec25SBlue Swirl return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | 351bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | 361bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | 371bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | 381bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | 391bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | 401bccec25SBlue Swirl (((pixel_addr >> 55) & 1) << 4) | 411bccec25SBlue Swirl (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | 421bccec25SBlue Swirl GET_FIELD_SP(pixel_addr, 11, 12); 431bccec25SBlue Swirl } 441bccec25SBlue Swirl 45e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN 461bccec25SBlue Swirl #define VIS_B64(n) b[7 - (n)] 47*d6f898cfSRichard Henderson #define VIS_SB64(n) sb[7 - (n)] 481bccec25SBlue Swirl #define VIS_W64(n) w[3 - (n)] 491bccec25SBlue Swirl #define VIS_SW64(n) sw[3 - (n)] 501bccec25SBlue Swirl #define VIS_L64(n) l[1 - (n)] 511bccec25SBlue Swirl #define VIS_B32(n) b[3 - (n)] 521bccec25SBlue Swirl #define VIS_W32(n) w[1 - (n)] 531bccec25SBlue Swirl #else 541bccec25SBlue Swirl #define VIS_B64(n) b[n] 55*d6f898cfSRichard Henderson #define VIS_SB64(n) sb[n] 561bccec25SBlue Swirl #define VIS_W64(n) w[n] 571bccec25SBlue Swirl #define VIS_SW64(n) sw[n] 581bccec25SBlue Swirl #define VIS_L64(n) l[n] 591bccec25SBlue Swirl #define VIS_B32(n) b[n] 601bccec25SBlue Swirl #define VIS_W32(n) w[n] 611bccec25SBlue Swirl #endif 621bccec25SBlue Swirl 631bccec25SBlue Swirl typedef union { 641bccec25SBlue Swirl uint8_t b[8]; 65*d6f898cfSRichard Henderson int8_t sb[8]; 661bccec25SBlue Swirl uint16_t w[4]; 671bccec25SBlue Swirl int16_t sw[4]; 681bccec25SBlue Swirl uint32_t l[2]; 691bccec25SBlue Swirl uint64_t ll; 701bccec25SBlue Swirl float64 d; 711bccec25SBlue Swirl } VIS64; 721bccec25SBlue Swirl 731bccec25SBlue Swirl typedef union { 741bccec25SBlue Swirl uint8_t b[4]; 751bccec25SBlue Swirl uint16_t w[2]; 761bccec25SBlue Swirl uint32_t l; 771bccec25SBlue Swirl float32 f; 781bccec25SBlue Swirl } VIS32; 791bccec25SBlue Swirl 80d3ef26afSRichard Henderson uint64_t helper_fpmerge(uint32_t src1, uint32_t src2) 811bccec25SBlue Swirl { 82d3ef26afSRichard Henderson VIS32 s1, s2; 83d3ef26afSRichard Henderson VIS64 d; 841bccec25SBlue Swirl 85d3ef26afSRichard Henderson s1.l = src1; 86d3ef26afSRichard Henderson s2.l = src2; 87d3ef26afSRichard Henderson d.ll = 0; 881bccec25SBlue Swirl 89d3ef26afSRichard Henderson d.VIS_B64(7) = s1.VIS_B32(3); 90d3ef26afSRichard Henderson d.VIS_B64(6) = s2.VIS_B32(3); 91d3ef26afSRichard Henderson d.VIS_B64(5) = s1.VIS_B32(2); 92d3ef26afSRichard Henderson d.VIS_B64(4) = s2.VIS_B32(2); 93d3ef26afSRichard Henderson d.VIS_B64(3) = s1.VIS_B32(1); 94d3ef26afSRichard Henderson d.VIS_B64(2) = s2.VIS_B32(1); 95d3ef26afSRichard Henderson d.VIS_B64(1) = s1.VIS_B32(0); 96d3ef26afSRichard Henderson d.VIS_B64(0) = s2.VIS_B32(0); 971bccec25SBlue Swirl 9803fb8cfcSRichard Henderson return d.ll; 991bccec25SBlue Swirl } 1001bccec25SBlue Swirl 101*d6f898cfSRichard Henderson static inline int do_ms16b(int x, int y) 102*d6f898cfSRichard Henderson { 103*d6f898cfSRichard Henderson return ((x * y) + 0x80) >> 8; 104*d6f898cfSRichard Henderson } 105*d6f898cfSRichard Henderson 1069157dcccSRichard Henderson uint64_t helper_fmul8x16(uint32_t src1, uint64_t src2) 1071bccec25SBlue Swirl { 1089157dcccSRichard Henderson VIS64 d; 1099157dcccSRichard Henderson VIS32 s; 1101bccec25SBlue Swirl 1119157dcccSRichard Henderson s.l = src1; 11203fb8cfcSRichard Henderson d.ll = src2; 1131bccec25SBlue Swirl 114*d6f898cfSRichard Henderson d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), d.VIS_SW64(0)); 115*d6f898cfSRichard Henderson d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), d.VIS_SW64(1)); 116*d6f898cfSRichard Henderson d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), d.VIS_SW64(2)); 117*d6f898cfSRichard Henderson d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), d.VIS_SW64(3)); 1181bccec25SBlue Swirl 11903fb8cfcSRichard Henderson return d.ll; 1201bccec25SBlue Swirl } 1211bccec25SBlue Swirl 122a859602cSRichard Henderson uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2) 1231bccec25SBlue Swirl { 124a859602cSRichard Henderson VIS32 s; 125a859602cSRichard Henderson VIS64 d; 1261bccec25SBlue Swirl 127a859602cSRichard Henderson s.l = src1; 128a859602cSRichard Henderson d.ll = 0; 1291bccec25SBlue Swirl 130*d6f898cfSRichard Henderson d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), src2); 131*d6f898cfSRichard Henderson d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), src2); 132*d6f898cfSRichard Henderson d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), src2); 133*d6f898cfSRichard Henderson d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), src2); 1341bccec25SBlue Swirl 13503fb8cfcSRichard Henderson return d.ll; 1361bccec25SBlue Swirl } 1371bccec25SBlue Swirl 138f027c3b1SRichard Henderson uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) 1391bccec25SBlue Swirl { 1401bccec25SBlue Swirl VIS64 s, d; 1411bccec25SBlue Swirl 14203fb8cfcSRichard Henderson s.ll = src1; 14303fb8cfcSRichard Henderson d.ll = src2; 1441bccec25SBlue Swirl 145*d6f898cfSRichard Henderson d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0)); 146*d6f898cfSRichard Henderson d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1)); 147*d6f898cfSRichard Henderson d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2)); 148*d6f898cfSRichard Henderson d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3)); 1491bccec25SBlue Swirl 15003fb8cfcSRichard Henderson return d.ll; 1511bccec25SBlue Swirl } 1521bccec25SBlue Swirl 153f027c3b1SRichard Henderson uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) 1541bccec25SBlue Swirl { 1551bccec25SBlue Swirl VIS64 s, d; 1561bccec25SBlue Swirl 15703fb8cfcSRichard Henderson s.ll = src1; 15803fb8cfcSRichard Henderson d.ll = src2; 1591bccec25SBlue Swirl 160*d6f898cfSRichard Henderson d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0)); 161*d6f898cfSRichard Henderson d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1)); 162*d6f898cfSRichard Henderson d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2)); 163*d6f898cfSRichard Henderson d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3)); 1641bccec25SBlue Swirl 16503fb8cfcSRichard Henderson return d.ll; 1661bccec25SBlue Swirl } 1671bccec25SBlue Swirl 1687b616f36SRichard Henderson uint64_t helper_fexpand(uint32_t src2) 1691bccec25SBlue Swirl { 1701bccec25SBlue Swirl VIS32 s; 1711bccec25SBlue Swirl VIS64 d; 1721bccec25SBlue Swirl 1737b616f36SRichard Henderson s.l = src2; 1747b616f36SRichard Henderson d.ll = 0; 1751bccec25SBlue Swirl d.VIS_W64(0) = s.VIS_B32(0) << 4; 1761bccec25SBlue Swirl d.VIS_W64(1) = s.VIS_B32(1) << 4; 1771bccec25SBlue Swirl d.VIS_W64(2) = s.VIS_B32(2) << 4; 1781bccec25SBlue Swirl d.VIS_W64(3) = s.VIS_B32(3) << 4; 1791bccec25SBlue Swirl 18003fb8cfcSRichard Henderson return d.ll; 1811bccec25SBlue Swirl } 1821bccec25SBlue Swirl 1831bccec25SBlue Swirl #define VIS_CMPHELPER(name, F) \ 184f027c3b1SRichard Henderson uint64_t name##16(uint64_t src1, uint64_t src2) \ 1851bccec25SBlue Swirl { \ 1861bccec25SBlue Swirl VIS64 s, d; \ 1871bccec25SBlue Swirl \ 18803fb8cfcSRichard Henderson s.ll = src1; \ 18903fb8cfcSRichard Henderson d.ll = src2; \ 1901bccec25SBlue Swirl \ 1911bccec25SBlue Swirl d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ 1921bccec25SBlue Swirl d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ 1931bccec25SBlue Swirl d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ 1941bccec25SBlue Swirl d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ 1951bccec25SBlue Swirl d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ 1961bccec25SBlue Swirl \ 1971bccec25SBlue Swirl return d.ll; \ 1981bccec25SBlue Swirl } \ 1991bccec25SBlue Swirl \ 200f027c3b1SRichard Henderson uint64_t name##32(uint64_t src1, uint64_t src2) \ 2011bccec25SBlue Swirl { \ 2021bccec25SBlue Swirl VIS64 s, d; \ 2031bccec25SBlue Swirl \ 20403fb8cfcSRichard Henderson s.ll = src1; \ 20503fb8cfcSRichard Henderson d.ll = src2; \ 2061bccec25SBlue Swirl \ 2071bccec25SBlue Swirl d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ 2081bccec25SBlue Swirl d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ 2091bccec25SBlue Swirl d.VIS_L64(1) = 0; \ 2101bccec25SBlue Swirl \ 2111bccec25SBlue Swirl return d.ll; \ 2121bccec25SBlue Swirl } 2131bccec25SBlue Swirl 2141bccec25SBlue Swirl #define FCMPGT(a, b) ((a) > (b)) 2151bccec25SBlue Swirl #define FCMPEQ(a, b) ((a) == (b)) 2161bccec25SBlue Swirl #define FCMPLE(a, b) ((a) <= (b)) 2171bccec25SBlue Swirl #define FCMPNE(a, b) ((a) != (b)) 2181bccec25SBlue Swirl 2191bccec25SBlue Swirl VIS_CMPHELPER(helper_fcmpgt, FCMPGT) 2201bccec25SBlue Swirl VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) 2211bccec25SBlue Swirl VIS_CMPHELPER(helper_fcmple, FCMPLE) 2221bccec25SBlue Swirl VIS_CMPHELPER(helper_fcmpne, FCMPNE) 223f888300bSRichard Henderson 224f888300bSRichard Henderson uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) 225f888300bSRichard Henderson { 226f888300bSRichard Henderson int i; 227f888300bSRichard Henderson for (i = 0; i < 8; i++) { 228f888300bSRichard Henderson int s1, s2; 229f888300bSRichard Henderson 230f888300bSRichard Henderson s1 = (src1 >> (56 - (i * 8))) & 0xff; 231f888300bSRichard Henderson s2 = (src2 >> (56 - (i * 8))) & 0xff; 232f888300bSRichard Henderson 233f888300bSRichard Henderson /* Absolute value of difference. */ 234f888300bSRichard Henderson s1 -= s2; 235f888300bSRichard Henderson if (s1 < 0) { 236f888300bSRichard Henderson s1 = -s1; 237f888300bSRichard Henderson } 238f888300bSRichard Henderson 239f888300bSRichard Henderson sum += s1; 240f888300bSRichard Henderson } 241f888300bSRichard Henderson 242f888300bSRichard Henderson return sum; 243f888300bSRichard Henderson } 2442dedf314SRichard Henderson 2452dedf314SRichard Henderson uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) 2462dedf314SRichard Henderson { 2472dedf314SRichard Henderson int scale = (gsr >> 3) & 0xf; 2482dedf314SRichard Henderson uint32_t ret = 0; 2492dedf314SRichard Henderson int byte; 2502dedf314SRichard Henderson 2512dedf314SRichard Henderson for (byte = 0; byte < 4; byte++) { 2522dedf314SRichard Henderson uint32_t val; 2532dedf314SRichard Henderson int16_t src = rs2 >> (byte * 16); 2542dedf314SRichard Henderson int32_t scaled = src << scale; 2552dedf314SRichard Henderson int32_t from_fixed = scaled >> 7; 2562dedf314SRichard Henderson 2572dedf314SRichard Henderson val = (from_fixed < 0 ? 0 : 2582dedf314SRichard Henderson from_fixed > 255 ? 255 : from_fixed); 2592dedf314SRichard Henderson 2602dedf314SRichard Henderson ret |= val << (8 * byte); 2612dedf314SRichard Henderson } 2622dedf314SRichard Henderson 2632dedf314SRichard Henderson return ret; 2642dedf314SRichard Henderson } 2652dedf314SRichard Henderson 2662dedf314SRichard Henderson uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) 2672dedf314SRichard Henderson { 2682dedf314SRichard Henderson int scale = (gsr >> 3) & 0x1f; 2692dedf314SRichard Henderson uint64_t ret = 0; 2702dedf314SRichard Henderson int word; 2712dedf314SRichard Henderson 2722dedf314SRichard Henderson ret = (rs1 << 8) & ~(0x000000ff000000ffULL); 2732dedf314SRichard Henderson for (word = 0; word < 2; word++) { 2742dedf314SRichard Henderson uint64_t val; 2752dedf314SRichard Henderson int32_t src = rs2 >> (word * 32); 2762dedf314SRichard Henderson int64_t scaled = (int64_t)src << scale; 2772dedf314SRichard Henderson int64_t from_fixed = scaled >> 23; 2782dedf314SRichard Henderson 2792dedf314SRichard Henderson val = (from_fixed < 0 ? 0 : 2802dedf314SRichard Henderson (from_fixed > 255) ? 255 : from_fixed); 2812dedf314SRichard Henderson 2822dedf314SRichard Henderson ret |= val << (32 * word); 2832dedf314SRichard Henderson } 2842dedf314SRichard Henderson 2852dedf314SRichard Henderson return ret; 2862dedf314SRichard Henderson } 2872dedf314SRichard Henderson 2882dedf314SRichard Henderson uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) 2892dedf314SRichard Henderson { 2902dedf314SRichard Henderson int scale = (gsr >> 3) & 0x1f; 2912dedf314SRichard Henderson uint32_t ret = 0; 2922dedf314SRichard Henderson int word; 2932dedf314SRichard Henderson 2942dedf314SRichard Henderson for (word = 0; word < 2; word++) { 2952dedf314SRichard Henderson uint32_t val; 2962dedf314SRichard Henderson int32_t src = rs2 >> (word * 32); 29712a3567cSPaolo Bonzini int64_t scaled = (int64_t)src << scale; 2982dedf314SRichard Henderson int64_t from_fixed = scaled >> 16; 2992dedf314SRichard Henderson 3002dedf314SRichard Henderson val = (from_fixed < -32768 ? -32768 : 3012dedf314SRichard Henderson from_fixed > 32767 ? 32767 : from_fixed); 3022dedf314SRichard Henderson 3032dedf314SRichard Henderson ret |= (val & 0xffff) << (word * 16); 3042dedf314SRichard Henderson } 3052dedf314SRichard Henderson 3062dedf314SRichard Henderson return ret; 3072dedf314SRichard Henderson } 308793a137aSRichard Henderson 309520c0d8dSAndreas Färber uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) 310793a137aSRichard Henderson { 311793a137aSRichard Henderson union { 312793a137aSRichard Henderson uint64_t ll[2]; 313793a137aSRichard Henderson uint8_t b[16]; 314793a137aSRichard Henderson } s; 315793a137aSRichard Henderson VIS64 r; 316793a137aSRichard Henderson uint32_t i, mask, host; 317793a137aSRichard Henderson 318793a137aSRichard Henderson /* Set up S such that we can index across all of the bytes. */ 319e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN 320793a137aSRichard Henderson s.ll[0] = src1; 321793a137aSRichard Henderson s.ll[1] = src2; 322793a137aSRichard Henderson host = 0; 323793a137aSRichard Henderson #else 324793a137aSRichard Henderson s.ll[1] = src1; 325793a137aSRichard Henderson s.ll[0] = src2; 326793a137aSRichard Henderson host = 15; 327793a137aSRichard Henderson #endif 328793a137aSRichard Henderson mask = gsr >> 32; 329793a137aSRichard Henderson 330793a137aSRichard Henderson for (i = 0; i < 8; ++i) { 331793a137aSRichard Henderson unsigned e = (mask >> (28 - i*4)) & 0xf; 332793a137aSRichard Henderson r.VIS_B64(i) = s.b[e ^ host]; 333793a137aSRichard Henderson } 334793a137aSRichard Henderson 335793a137aSRichard Henderson return r.ll; 336793a137aSRichard Henderson } 337