1a04b68e1SRichard Henderson /* 2a04b68e1SRichard Henderson * ARM AdvSIMD / SVE Vector Helpers 3a04b68e1SRichard Henderson * 4a04b68e1SRichard Henderson * Copyright (c) 2020 Linaro 5a04b68e1SRichard Henderson * 6a04b68e1SRichard Henderson * This library is free software; you can redistribute it and/or 7a04b68e1SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8a04b68e1SRichard Henderson * License as published by the Free Software Foundation; either 950f57e09SChetan Pant * version 2.1 of the License, or (at your option) any later version. 10a04b68e1SRichard Henderson * 11a04b68e1SRichard Henderson * This library is distributed in the hope that it will be useful, 12a04b68e1SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13a04b68e1SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14a04b68e1SRichard Henderson * Lesser General Public License for more details. 15a04b68e1SRichard Henderson * 16a04b68e1SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17a04b68e1SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18a04b68e1SRichard Henderson */ 19a04b68e1SRichard Henderson 2052581c71SMarkus Armbruster #ifndef TARGET_ARM_VEC_INTERNAL_H 2152581c71SMarkus Armbruster #define TARGET_ARM_VEC_INTERNAL_H 22a04b68e1SRichard Henderson 2393966af1SRichard Henderson /* 2493966af1SRichard Henderson * Note that vector data is stored in host-endian 64-bit chunks, 2593966af1SRichard Henderson * so addressing units smaller than that needs a host-endian fixup. 2693966af1SRichard Henderson * 2793966af1SRichard Henderson * The H<N> macros are used when indexing an array of elements of size N. 2893966af1SRichard Henderson * 2993966af1SRichard Henderson * The H1_<N> macros are used when performing byte arithmetic and then 3093966af1SRichard Henderson * casting the final pointer to a type of size N. 3193966af1SRichard Henderson */ 32e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN 3393966af1SRichard Henderson #define H1(x) ((x) ^ 7) 3493966af1SRichard Henderson #define H1_2(x) ((x) ^ 6) 3593966af1SRichard Henderson #define H1_4(x) ((x) ^ 4) 3693966af1SRichard Henderson #define H2(x) ((x) ^ 3) 3793966af1SRichard Henderson #define H4(x) ((x) ^ 1) 3893966af1SRichard Henderson #else 3993966af1SRichard Henderson #define H1(x) (x) 4093966af1SRichard Henderson #define H1_2(x) (x) 4193966af1SRichard Henderson #define H1_4(x) (x) 4293966af1SRichard Henderson #define H2(x) (x) 4393966af1SRichard Henderson #define H4(x) (x) 4493966af1SRichard Henderson #endif 456e802db3SPeter Maydell /* 466e802db3SPeter Maydell * Access to 64-bit elements isn't host-endian dependent; we provide H8 476e802db3SPeter Maydell * and H1_8 so that when a function is being generated from a macro we 486e802db3SPeter Maydell * can pass these rather than an empty macro argument, for clarity. 496e802db3SPeter Maydell */ 506e802db3SPeter Maydell #define H8(x) (x) 516e802db3SPeter Maydell #define H1_8(x) (x) 5293966af1SRichard Henderson 53*820e0bb9SRichard Henderson /* 54*820e0bb9SRichard Henderson * Expand active predicate bits to bytes, for byte elements. 55*820e0bb9SRichard Henderson */ 5677f96148SPeter Maydell extern const uint64_t expand_pred_b_data[256]; 57*820e0bb9SRichard Henderson static inline uint64_t expand_pred_b(uint8_t byte) 58*820e0bb9SRichard Henderson { 59*820e0bb9SRichard Henderson return expand_pred_b_data[byte]; 60*820e0bb9SRichard Henderson } 6177f96148SPeter Maydell 62a04b68e1SRichard Henderson static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) 63a04b68e1SRichard Henderson { 64a04b68e1SRichard Henderson uint64_t *d = vd + opr_sz; 65a04b68e1SRichard Henderson uintptr_t i; 66a04b68e1SRichard Henderson 67a04b68e1SRichard Henderson for (i = opr_sz; i < max_sz; i += 8) { 68a04b68e1SRichard Henderson *d++ = 0; 69a04b68e1SRichard Henderson } 70a04b68e1SRichard Henderson } 71a04b68e1SRichard Henderson 728b3f15b0SRichard Henderson static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits, 738b3f15b0SRichard Henderson bool round, uint32_t *sat) 748b3f15b0SRichard Henderson { 758b3f15b0SRichard Henderson if (shift <= -bits) { 768b3f15b0SRichard Henderson /* Rounding the sign bit always produces 0. */ 778b3f15b0SRichard Henderson if (round) { 788b3f15b0SRichard Henderson return 0; 798b3f15b0SRichard Henderson } 808b3f15b0SRichard Henderson return src >> 31; 818b3f15b0SRichard Henderson } else if (shift < 0) { 828b3f15b0SRichard Henderson if (round) { 838b3f15b0SRichard Henderson src >>= -shift - 1; 848b3f15b0SRichard Henderson return (src >> 1) + (src & 1); 858b3f15b0SRichard Henderson } 868b3f15b0SRichard Henderson return src >> -shift; 878b3f15b0SRichard Henderson } else if (shift < bits) { 888b3f15b0SRichard Henderson int32_t val = src << shift; 898b3f15b0SRichard Henderson if (bits == 32) { 908b3f15b0SRichard Henderson if (!sat || val >> shift == src) { 918b3f15b0SRichard Henderson return val; 928b3f15b0SRichard Henderson } 938b3f15b0SRichard Henderson } else { 948b3f15b0SRichard Henderson int32_t extval = sextract32(val, 0, bits); 958b3f15b0SRichard Henderson if (!sat || val == extval) { 968b3f15b0SRichard Henderson return extval; 978b3f15b0SRichard Henderson } 988b3f15b0SRichard Henderson } 998b3f15b0SRichard Henderson } else if (!sat || src == 0) { 1008b3f15b0SRichard Henderson return 0; 1018b3f15b0SRichard Henderson } 1028b3f15b0SRichard Henderson 1038b3f15b0SRichard Henderson *sat = 1; 1048b3f15b0SRichard Henderson return (1u << (bits - 1)) - (src >= 0); 1058b3f15b0SRichard Henderson } 1068b3f15b0SRichard Henderson 1078b3f15b0SRichard Henderson static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits, 1088b3f15b0SRichard Henderson bool round, uint32_t *sat) 1098b3f15b0SRichard Henderson { 1108b3f15b0SRichard Henderson if (shift <= -(bits + round)) { 1118b3f15b0SRichard Henderson return 0; 1128b3f15b0SRichard Henderson } else if (shift < 0) { 1138b3f15b0SRichard Henderson if (round) { 1148b3f15b0SRichard Henderson src >>= -shift - 1; 1158b3f15b0SRichard Henderson return (src >> 1) + (src & 1); 1168b3f15b0SRichard Henderson } 1178b3f15b0SRichard Henderson return src >> -shift; 1188b3f15b0SRichard Henderson } else if (shift < bits) { 1198b3f15b0SRichard Henderson uint32_t val = src << shift; 1208b3f15b0SRichard Henderson if (bits == 32) { 1218b3f15b0SRichard Henderson if (!sat || val >> shift == src) { 1228b3f15b0SRichard Henderson return val; 1238b3f15b0SRichard Henderson } 1248b3f15b0SRichard Henderson } else { 1258b3f15b0SRichard Henderson uint32_t extval = extract32(val, 0, bits); 1268b3f15b0SRichard Henderson if (!sat || val == extval) { 1278b3f15b0SRichard Henderson return extval; 1288b3f15b0SRichard Henderson } 1298b3f15b0SRichard Henderson } 1308b3f15b0SRichard Henderson } else if (!sat || src == 0) { 1318b3f15b0SRichard Henderson return 0; 1328b3f15b0SRichard Henderson } 1338b3f15b0SRichard Henderson 1348b3f15b0SRichard Henderson *sat = 1; 1358b3f15b0SRichard Henderson return MAKE_64BIT_MASK(0, bits); 1368b3f15b0SRichard Henderson } 1378b3f15b0SRichard Henderson 1388b3f15b0SRichard Henderson static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits, 1398b3f15b0SRichard Henderson bool round, uint32_t *sat) 1408b3f15b0SRichard Henderson { 1418b3f15b0SRichard Henderson if (sat && src < 0) { 1428b3f15b0SRichard Henderson *sat = 1; 1438b3f15b0SRichard Henderson return 0; 1448b3f15b0SRichard Henderson } 1458b3f15b0SRichard Henderson return do_uqrshl_bhs(src, shift, bits, round, sat); 1468b3f15b0SRichard Henderson } 1478b3f15b0SRichard Henderson 1488b3f15b0SRichard Henderson static inline int64_t do_sqrshl_d(int64_t src, int64_t shift, 1498b3f15b0SRichard Henderson bool round, uint32_t *sat) 1508b3f15b0SRichard Henderson { 1518b3f15b0SRichard Henderson if (shift <= -64) { 1528b3f15b0SRichard Henderson /* Rounding the sign bit always produces 0. */ 1538b3f15b0SRichard Henderson if (round) { 1548b3f15b0SRichard Henderson return 0; 1558b3f15b0SRichard Henderson } 1568b3f15b0SRichard Henderson return src >> 63; 1578b3f15b0SRichard Henderson } else if (shift < 0) { 1588b3f15b0SRichard Henderson if (round) { 1598b3f15b0SRichard Henderson src >>= -shift - 1; 1608b3f15b0SRichard Henderson return (src >> 1) + (src & 1); 1618b3f15b0SRichard Henderson } 1628b3f15b0SRichard Henderson return src >> -shift; 1638b3f15b0SRichard Henderson } else if (shift < 64) { 1648b3f15b0SRichard Henderson int64_t val = src << shift; 1658b3f15b0SRichard Henderson if (!sat || val >> shift == src) { 1668b3f15b0SRichard Henderson return val; 1678b3f15b0SRichard Henderson } 1688b3f15b0SRichard Henderson } else if (!sat || src == 0) { 1698b3f15b0SRichard Henderson return 0; 1708b3f15b0SRichard Henderson } 1718b3f15b0SRichard Henderson 1728b3f15b0SRichard Henderson *sat = 1; 1738b3f15b0SRichard Henderson return src < 0 ? INT64_MIN : INT64_MAX; 1748b3f15b0SRichard Henderson } 1758b3f15b0SRichard Henderson 1768b3f15b0SRichard Henderson static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift, 1778b3f15b0SRichard Henderson bool round, uint32_t *sat) 1788b3f15b0SRichard Henderson { 1798b3f15b0SRichard Henderson if (shift <= -(64 + round)) { 1808b3f15b0SRichard Henderson return 0; 1818b3f15b0SRichard Henderson } else if (shift < 0) { 1828b3f15b0SRichard Henderson if (round) { 1838b3f15b0SRichard Henderson src >>= -shift - 1; 1848b3f15b0SRichard Henderson return (src >> 1) + (src & 1); 1858b3f15b0SRichard Henderson } 1868b3f15b0SRichard Henderson return src >> -shift; 1878b3f15b0SRichard Henderson } else if (shift < 64) { 1888b3f15b0SRichard Henderson uint64_t val = src << shift; 1898b3f15b0SRichard Henderson if (!sat || val >> shift == src) { 1908b3f15b0SRichard Henderson return val; 1918b3f15b0SRichard Henderson } 1928b3f15b0SRichard Henderson } else if (!sat || src == 0) { 1938b3f15b0SRichard Henderson return 0; 1948b3f15b0SRichard Henderson } 1958b3f15b0SRichard Henderson 1968b3f15b0SRichard Henderson *sat = 1; 1978b3f15b0SRichard Henderson return UINT64_MAX; 1988b3f15b0SRichard Henderson } 1998b3f15b0SRichard Henderson 2008b3f15b0SRichard Henderson static inline int64_t do_suqrshl_d(int64_t src, int64_t shift, 2018b3f15b0SRichard Henderson bool round, uint32_t *sat) 2028b3f15b0SRichard Henderson { 2038b3f15b0SRichard Henderson if (sat && src < 0) { 2048b3f15b0SRichard Henderson *sat = 1; 2058b3f15b0SRichard Henderson return 0; 2068b3f15b0SRichard Henderson } 2078b3f15b0SRichard Henderson return do_uqrshl_d(src, shift, round, sat); 2088b3f15b0SRichard Henderson } 2098b3f15b0SRichard Henderson 210d782d3caSRichard Henderson int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool); 211d782d3caSRichard Henderson int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *); 212d782d3caSRichard Henderson int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *); 213d782d3caSRichard Henderson int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool); 214d782d3caSRichard Henderson 215c1bd78cbSPeter Maydell /* 216c1bd78cbSPeter Maydell * 8 x 8 -> 16 vector polynomial multiply where the inputs are 217c1bd78cbSPeter Maydell * in the low 8 bits of each 16-bit element 218c1bd78cbSPeter Maydell */ 219c1bd78cbSPeter Maydell uint64_t pmull_h(uint64_t op1, uint64_t op2); 220c1bd78cbSPeter Maydell /* 221c1bd78cbSPeter Maydell * 16 x 16 -> 32 vector polynomial multiply where the inputs are 222c1bd78cbSPeter Maydell * in the low 16 bits of each 32-bit element 223c1bd78cbSPeter Maydell */ 224c1bd78cbSPeter Maydell uint64_t pmull_w(uint64_t op1, uint64_t op2); 225c1bd78cbSPeter Maydell 22652581c71SMarkus Armbruster #endif /* TARGET_ARM_VEC_INTERNAL_H */ 227