1a04b68e1SRichard Henderson /* 2a04b68e1SRichard Henderson * ARM AdvSIMD / SVE Vector Helpers 3a04b68e1SRichard Henderson * 4a04b68e1SRichard Henderson * Copyright (c) 2020 Linaro 5a04b68e1SRichard Henderson * 6a04b68e1SRichard Henderson * This library is free software; you can redistribute it and/or 7a04b68e1SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8a04b68e1SRichard Henderson * License as published by the Free Software Foundation; either 950f57e09SChetan Pant * version 2.1 of the License, or (at your option) any later version. 10a04b68e1SRichard Henderson * 11a04b68e1SRichard Henderson * This library is distributed in the hope that it will be useful, 12a04b68e1SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13a04b68e1SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14a04b68e1SRichard Henderson * Lesser General Public License for more details. 15a04b68e1SRichard Henderson * 16a04b68e1SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17a04b68e1SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18a04b68e1SRichard Henderson */ 19a04b68e1SRichard Henderson 20a04b68e1SRichard Henderson #ifndef TARGET_ARM_VEC_INTERNALS_H 21a04b68e1SRichard Henderson #define TARGET_ARM_VEC_INTERNALS_H 22a04b68e1SRichard Henderson 2393966af1SRichard Henderson /* 2493966af1SRichard Henderson * Note that vector data is stored in host-endian 64-bit chunks, 2593966af1SRichard Henderson * so addressing units smaller than that needs a host-endian fixup. 2693966af1SRichard Henderson * 2793966af1SRichard Henderson * The H<N> macros are used when indexing an array of elements of size N. 2893966af1SRichard Henderson * 2993966af1SRichard Henderson * The H1_<N> macros are used when performing byte arithmetic and then 3093966af1SRichard Henderson * casting the final pointer to a type of size N. 3193966af1SRichard Henderson */ 3293966af1SRichard Henderson #ifdef HOST_WORDS_BIGENDIAN 3393966af1SRichard Henderson #define H1(x) ((x) ^ 7) 3493966af1SRichard Henderson #define H1_2(x) ((x) ^ 6) 3593966af1SRichard Henderson #define H1_4(x) ((x) ^ 4) 3693966af1SRichard Henderson #define H2(x) ((x) ^ 3) 3793966af1SRichard Henderson #define H4(x) ((x) ^ 1) 3893966af1SRichard Henderson #else 3993966af1SRichard Henderson #define H1(x) (x) 4093966af1SRichard Henderson #define H1_2(x) (x) 4193966af1SRichard Henderson #define H1_4(x) (x) 4293966af1SRichard Henderson #define H2(x) (x) 4393966af1SRichard Henderson #define H4(x) (x) 4493966af1SRichard Henderson #endif 456e802db3SPeter Maydell /* 466e802db3SPeter Maydell * Access to 64-bit elements isn't host-endian dependent; we provide H8 476e802db3SPeter Maydell * and H1_8 so that when a function is being generated from a macro we 486e802db3SPeter Maydell * can pass these rather than an empty macro argument, for clarity. 496e802db3SPeter Maydell */ 506e802db3SPeter Maydell #define H8(x) (x) 516e802db3SPeter Maydell #define H1_8(x) (x) 5293966af1SRichard Henderson 53*77f96148SPeter Maydell /* Data for expanding active predicate bits to bytes, for byte elements. */ 54*77f96148SPeter Maydell extern const uint64_t expand_pred_b_data[256]; 55*77f96148SPeter Maydell 56a04b68e1SRichard Henderson static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) 57a04b68e1SRichard Henderson { 58a04b68e1SRichard Henderson uint64_t *d = vd + opr_sz; 59a04b68e1SRichard Henderson uintptr_t i; 60a04b68e1SRichard Henderson 61a04b68e1SRichard Henderson for (i = opr_sz; i < max_sz; i += 8) { 62a04b68e1SRichard Henderson *d++ = 0; 63a04b68e1SRichard Henderson } 64a04b68e1SRichard Henderson } 65a04b68e1SRichard Henderson 668b3f15b0SRichard Henderson static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits, 678b3f15b0SRichard Henderson bool round, uint32_t *sat) 688b3f15b0SRichard Henderson { 698b3f15b0SRichard Henderson if (shift <= -bits) { 708b3f15b0SRichard Henderson /* Rounding the sign bit always produces 0. */ 718b3f15b0SRichard Henderson if (round) { 728b3f15b0SRichard Henderson return 0; 738b3f15b0SRichard Henderson } 748b3f15b0SRichard Henderson return src >> 31; 758b3f15b0SRichard Henderson } else if (shift < 0) { 768b3f15b0SRichard Henderson if (round) { 778b3f15b0SRichard Henderson src >>= -shift - 1; 788b3f15b0SRichard Henderson return (src >> 1) + (src & 1); 798b3f15b0SRichard Henderson } 808b3f15b0SRichard Henderson return src >> -shift; 818b3f15b0SRichard Henderson } else if (shift < bits) { 828b3f15b0SRichard Henderson int32_t val = src << shift; 838b3f15b0SRichard Henderson if (bits == 32) { 848b3f15b0SRichard Henderson if (!sat || val >> shift == src) { 858b3f15b0SRichard Henderson return val; 868b3f15b0SRichard Henderson } 878b3f15b0SRichard Henderson } else { 888b3f15b0SRichard Henderson int32_t extval = sextract32(val, 0, bits); 898b3f15b0SRichard Henderson if (!sat || val == extval) { 908b3f15b0SRichard Henderson return extval; 918b3f15b0SRichard Henderson } 928b3f15b0SRichard Henderson } 938b3f15b0SRichard Henderson } else if (!sat || src == 0) { 948b3f15b0SRichard Henderson return 0; 958b3f15b0SRichard Henderson } 968b3f15b0SRichard Henderson 978b3f15b0SRichard Henderson *sat = 1; 988b3f15b0SRichard Henderson return (1u << (bits - 1)) - (src >= 0); 998b3f15b0SRichard Henderson } 1008b3f15b0SRichard Henderson 1018b3f15b0SRichard Henderson static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits, 1028b3f15b0SRichard Henderson bool round, uint32_t *sat) 1038b3f15b0SRichard Henderson { 1048b3f15b0SRichard Henderson if (shift <= -(bits + round)) { 1058b3f15b0SRichard Henderson return 0; 1068b3f15b0SRichard Henderson } else if (shift < 0) { 1078b3f15b0SRichard Henderson if (round) { 1088b3f15b0SRichard Henderson src >>= -shift - 1; 1098b3f15b0SRichard Henderson return (src >> 1) + (src & 1); 1108b3f15b0SRichard Henderson } 1118b3f15b0SRichard Henderson return src >> -shift; 1128b3f15b0SRichard Henderson } else if (shift < bits) { 1138b3f15b0SRichard Henderson uint32_t val = src << shift; 1148b3f15b0SRichard Henderson if (bits == 32) { 1158b3f15b0SRichard Henderson if (!sat || val >> shift == src) { 1168b3f15b0SRichard Henderson return val; 1178b3f15b0SRichard Henderson } 1188b3f15b0SRichard Henderson } else { 1198b3f15b0SRichard Henderson uint32_t extval = extract32(val, 0, bits); 1208b3f15b0SRichard Henderson if (!sat || val == extval) { 1218b3f15b0SRichard Henderson return extval; 1228b3f15b0SRichard Henderson } 1238b3f15b0SRichard Henderson } 1248b3f15b0SRichard Henderson } else if (!sat || src == 0) { 1258b3f15b0SRichard Henderson return 0; 1268b3f15b0SRichard Henderson } 1278b3f15b0SRichard Henderson 1288b3f15b0SRichard Henderson *sat = 1; 1298b3f15b0SRichard Henderson return MAKE_64BIT_MASK(0, bits); 1308b3f15b0SRichard Henderson } 1318b3f15b0SRichard Henderson 1328b3f15b0SRichard Henderson static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits, 1338b3f15b0SRichard Henderson bool round, uint32_t *sat) 1348b3f15b0SRichard Henderson { 1358b3f15b0SRichard Henderson if (sat && src < 0) { 1368b3f15b0SRichard Henderson *sat = 1; 1378b3f15b0SRichard Henderson return 0; 1388b3f15b0SRichard Henderson } 1398b3f15b0SRichard Henderson return do_uqrshl_bhs(src, shift, bits, round, sat); 1408b3f15b0SRichard Henderson } 1418b3f15b0SRichard Henderson 1428b3f15b0SRichard Henderson static inline int64_t do_sqrshl_d(int64_t src, int64_t shift, 1438b3f15b0SRichard Henderson bool round, uint32_t *sat) 1448b3f15b0SRichard Henderson { 1458b3f15b0SRichard Henderson if (shift <= -64) { 1468b3f15b0SRichard Henderson /* Rounding the sign bit always produces 0. */ 1478b3f15b0SRichard Henderson if (round) { 1488b3f15b0SRichard Henderson return 0; 1498b3f15b0SRichard Henderson } 1508b3f15b0SRichard Henderson return src >> 63; 1518b3f15b0SRichard Henderson } else if (shift < 0) { 1528b3f15b0SRichard Henderson if (round) { 1538b3f15b0SRichard Henderson src >>= -shift - 1; 1548b3f15b0SRichard Henderson return (src >> 1) + (src & 1); 1558b3f15b0SRichard Henderson } 1568b3f15b0SRichard Henderson return src >> -shift; 1578b3f15b0SRichard Henderson } else if (shift < 64) { 1588b3f15b0SRichard Henderson int64_t val = src << shift; 1598b3f15b0SRichard Henderson if (!sat || val >> shift == src) { 1608b3f15b0SRichard Henderson return val; 1618b3f15b0SRichard Henderson } 1628b3f15b0SRichard Henderson } else if (!sat || src == 0) { 1638b3f15b0SRichard Henderson return 0; 1648b3f15b0SRichard Henderson } 1658b3f15b0SRichard Henderson 1668b3f15b0SRichard Henderson *sat = 1; 1678b3f15b0SRichard Henderson return src < 0 ? INT64_MIN : INT64_MAX; 1688b3f15b0SRichard Henderson } 1698b3f15b0SRichard Henderson 1708b3f15b0SRichard Henderson static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift, 1718b3f15b0SRichard Henderson bool round, uint32_t *sat) 1728b3f15b0SRichard Henderson { 1738b3f15b0SRichard Henderson if (shift <= -(64 + round)) { 1748b3f15b0SRichard Henderson return 0; 1758b3f15b0SRichard Henderson } else if (shift < 0) { 1768b3f15b0SRichard Henderson if (round) { 1778b3f15b0SRichard Henderson src >>= -shift - 1; 1788b3f15b0SRichard Henderson return (src >> 1) + (src & 1); 1798b3f15b0SRichard Henderson } 1808b3f15b0SRichard Henderson return src >> -shift; 1818b3f15b0SRichard Henderson } else if (shift < 64) { 1828b3f15b0SRichard Henderson uint64_t val = src << shift; 1838b3f15b0SRichard Henderson if (!sat || val >> shift == src) { 1848b3f15b0SRichard Henderson return val; 1858b3f15b0SRichard Henderson } 1868b3f15b0SRichard Henderson } else if (!sat || src == 0) { 1878b3f15b0SRichard Henderson return 0; 1888b3f15b0SRichard Henderson } 1898b3f15b0SRichard Henderson 1908b3f15b0SRichard Henderson *sat = 1; 1918b3f15b0SRichard Henderson return UINT64_MAX; 1928b3f15b0SRichard Henderson } 1938b3f15b0SRichard Henderson 1948b3f15b0SRichard Henderson static inline int64_t do_suqrshl_d(int64_t src, int64_t shift, 1958b3f15b0SRichard Henderson bool round, uint32_t *sat) 1968b3f15b0SRichard Henderson { 1978b3f15b0SRichard Henderson if (sat && src < 0) { 1988b3f15b0SRichard Henderson *sat = 1; 1998b3f15b0SRichard Henderson return 0; 2008b3f15b0SRichard Henderson } 2018b3f15b0SRichard Henderson return do_uqrshl_d(src, shift, round, sat); 2028b3f15b0SRichard Henderson } 2038b3f15b0SRichard Henderson 204d782d3caSRichard Henderson int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool); 205d782d3caSRichard Henderson int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *); 206d782d3caSRichard Henderson int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *); 207d782d3caSRichard Henderson int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool); 208d782d3caSRichard Henderson 209a04b68e1SRichard Henderson #endif /* TARGET_ARM_VEC_INTERNALS_H */ 210