1 /* 2 * ARM AdvSIMD / SVE Vector Helpers 3 * 4 * Copyright (c) 2020 Linaro 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #ifndef TARGET_ARM_VEC_INTERNALS_H 21 #define TARGET_ARM_VEC_INTERNALS_H 22 23 /* 24 * Note that vector data is stored in host-endian 64-bit chunks, 25 * so addressing units smaller than that needs a host-endian fixup. 26 * 27 * The H<N> macros are used when indexing an array of elements of size N. 28 * 29 * The H1_<N> macros are used when performing byte arithmetic and then 30 * casting the final pointer to a type of size N. 31 */ 32 #ifdef HOST_WORDS_BIGENDIAN 33 #define H1(x) ((x) ^ 7) 34 #define H1_2(x) ((x) ^ 6) 35 #define H1_4(x) ((x) ^ 4) 36 #define H2(x) ((x) ^ 3) 37 #define H4(x) ((x) ^ 1) 38 #else 39 #define H1(x) (x) 40 #define H1_2(x) (x) 41 #define H1_4(x) (x) 42 #define H2(x) (x) 43 #define H4(x) (x) 44 #endif 45 46 47 static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) 48 { 49 uint64_t *d = vd + opr_sz; 50 uintptr_t i; 51 52 for (i = opr_sz; i < max_sz; i += 8) { 53 *d++ = 0; 54 } 55 } 56 57 static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits, 58 bool round, uint32_t *sat) 59 { 60 if (shift <= -bits) { 61 /* Rounding the sign bit always produces 0. */ 62 if (round) { 63 return 0; 64 } 65 return src >> 31; 66 } else if (shift < 0) { 67 if (round) { 68 src >>= -shift - 1; 69 return (src >> 1) + (src & 1); 70 } 71 return src >> -shift; 72 } else if (shift < bits) { 73 int32_t val = src << shift; 74 if (bits == 32) { 75 if (!sat || val >> shift == src) { 76 return val; 77 } 78 } else { 79 int32_t extval = sextract32(val, 0, bits); 80 if (!sat || val == extval) { 81 return extval; 82 } 83 } 84 } else if (!sat || src == 0) { 85 return 0; 86 } 87 88 *sat = 1; 89 return (1u << (bits - 1)) - (src >= 0); 90 } 91 92 static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits, 93 bool round, uint32_t *sat) 94 { 95 if (shift <= -(bits + round)) { 96 return 0; 97 } else if (shift < 0) { 98 if (round) { 99 src >>= -shift - 1; 100 return (src >> 1) + (src & 1); 101 } 102 return src >> -shift; 103 } else if (shift < bits) { 104 uint32_t val = src << shift; 105 if (bits == 32) { 106 if (!sat || val >> shift == src) { 107 return val; 108 } 109 } else { 110 uint32_t extval = extract32(val, 0, bits); 111 if (!sat || val == extval) { 112 return extval; 113 } 114 } 115 } else if (!sat || src == 0) { 116 return 0; 117 } 118 119 *sat = 1; 120 return MAKE_64BIT_MASK(0, bits); 121 } 122 123 static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits, 124 bool round, uint32_t *sat) 125 { 126 if (sat && src < 0) { 127 *sat = 1; 128 return 0; 129 } 130 return do_uqrshl_bhs(src, shift, bits, round, sat); 131 } 132 133 static inline int64_t do_sqrshl_d(int64_t src, int64_t shift, 134 bool round, uint32_t *sat) 135 { 136 if (shift <= -64) { 137 /* Rounding the sign bit always produces 0. */ 138 if (round) { 139 return 0; 140 } 141 return src >> 63; 142 } else if (shift < 0) { 143 if (round) { 144 src >>= -shift - 1; 145 return (src >> 1) + (src & 1); 146 } 147 return src >> -shift; 148 } else if (shift < 64) { 149 int64_t val = src << shift; 150 if (!sat || val >> shift == src) { 151 return val; 152 } 153 } else if (!sat || src == 0) { 154 return 0; 155 } 156 157 *sat = 1; 158 return src < 0 ? INT64_MIN : INT64_MAX; 159 } 160 161 static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift, 162 bool round, uint32_t *sat) 163 { 164 if (shift <= -(64 + round)) { 165 return 0; 166 } else if (shift < 0) { 167 if (round) { 168 src >>= -shift - 1; 169 return (src >> 1) + (src & 1); 170 } 171 return src >> -shift; 172 } else if (shift < 64) { 173 uint64_t val = src << shift; 174 if (!sat || val >> shift == src) { 175 return val; 176 } 177 } else if (!sat || src == 0) { 178 return 0; 179 } 180 181 *sat = 1; 182 return UINT64_MAX; 183 } 184 185 static inline int64_t do_suqrshl_d(int64_t src, int64_t shift, 186 bool round, uint32_t *sat) 187 { 188 if (sat && src < 0) { 189 *sat = 1; 190 return 0; 191 } 192 return do_uqrshl_d(src, shift, round, sat); 193 } 194 195 int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool); 196 int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *); 197 int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *); 198 int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool); 199 200 #endif /* TARGET_ARM_VEC_INTERNALS_H */ 201