xref: /qemu/target/arm/tcg/vec_internal.h (revision 77f96148f3f6c4106a2a3cee8146690f954fd6cd)
1 /*
2  * ARM AdvSIMD / SVE Vector Helpers
3  *
4  * Copyright (c) 2020 Linaro
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef TARGET_ARM_VEC_INTERNALS_H
21 #define TARGET_ARM_VEC_INTERNALS_H
22 
23 /*
24  * Note that vector data is stored in host-endian 64-bit chunks,
25  * so addressing units smaller than that needs a host-endian fixup.
26  *
27  * The H<N> macros are used when indexing an array of elements of size N.
28  *
29  * The H1_<N> macros are used when performing byte arithmetic and then
30  * casting the final pointer to a type of size N.
31  */
32 #ifdef HOST_WORDS_BIGENDIAN
33 #define H1(x)   ((x) ^ 7)
34 #define H1_2(x) ((x) ^ 6)
35 #define H1_4(x) ((x) ^ 4)
36 #define H2(x)   ((x) ^ 3)
37 #define H4(x)   ((x) ^ 1)
38 #else
39 #define H1(x)   (x)
40 #define H1_2(x) (x)
41 #define H1_4(x) (x)
42 #define H2(x)   (x)
43 #define H4(x)   (x)
44 #endif
45 /*
46  * Access to 64-bit elements isn't host-endian dependent; we provide H8
47  * and H1_8 so that when a function is being generated from a macro we
48  * can pass these rather than an empty macro argument, for clarity.
49  */
50 #define H8(x)   (x)
51 #define H1_8(x) (x)
52 
53 /* Data for expanding active predicate bits to bytes, for byte elements. */
54 extern const uint64_t expand_pred_b_data[256];
55 
56 static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
57 {
58     uint64_t *d = vd + opr_sz;
59     uintptr_t i;
60 
61     for (i = opr_sz; i < max_sz; i += 8) {
62         *d++ = 0;
63     }
64 }
65 
66 static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits,
67                                     bool round, uint32_t *sat)
68 {
69     if (shift <= -bits) {
70         /* Rounding the sign bit always produces 0. */
71         if (round) {
72             return 0;
73         }
74         return src >> 31;
75     } else if (shift < 0) {
76         if (round) {
77             src >>= -shift - 1;
78             return (src >> 1) + (src & 1);
79         }
80         return src >> -shift;
81     } else if (shift < bits) {
82         int32_t val = src << shift;
83         if (bits == 32) {
84             if (!sat || val >> shift == src) {
85                 return val;
86             }
87         } else {
88             int32_t extval = sextract32(val, 0, bits);
89             if (!sat || val == extval) {
90                 return extval;
91             }
92         }
93     } else if (!sat || src == 0) {
94         return 0;
95     }
96 
97     *sat = 1;
98     return (1u << (bits - 1)) - (src >= 0);
99 }
100 
101 static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits,
102                                      bool round, uint32_t *sat)
103 {
104     if (shift <= -(bits + round)) {
105         return 0;
106     } else if (shift < 0) {
107         if (round) {
108             src >>= -shift - 1;
109             return (src >> 1) + (src & 1);
110         }
111         return src >> -shift;
112     } else if (shift < bits) {
113         uint32_t val = src << shift;
114         if (bits == 32) {
115             if (!sat || val >> shift == src) {
116                 return val;
117             }
118         } else {
119             uint32_t extval = extract32(val, 0, bits);
120             if (!sat || val == extval) {
121                 return extval;
122             }
123         }
124     } else if (!sat || src == 0) {
125         return 0;
126     }
127 
128     *sat = 1;
129     return MAKE_64BIT_MASK(0, bits);
130 }
131 
132 static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits,
133                                      bool round, uint32_t *sat)
134 {
135     if (sat && src < 0) {
136         *sat = 1;
137         return 0;
138     }
139     return do_uqrshl_bhs(src, shift, bits, round, sat);
140 }
141 
142 static inline int64_t do_sqrshl_d(int64_t src, int64_t shift,
143                                   bool round, uint32_t *sat)
144 {
145     if (shift <= -64) {
146         /* Rounding the sign bit always produces 0. */
147         if (round) {
148             return 0;
149         }
150         return src >> 63;
151     } else if (shift < 0) {
152         if (round) {
153             src >>= -shift - 1;
154             return (src >> 1) + (src & 1);
155         }
156         return src >> -shift;
157     } else if (shift < 64) {
158         int64_t val = src << shift;
159         if (!sat || val >> shift == src) {
160             return val;
161         }
162     } else if (!sat || src == 0) {
163         return 0;
164     }
165 
166     *sat = 1;
167     return src < 0 ? INT64_MIN : INT64_MAX;
168 }
169 
170 static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift,
171                                    bool round, uint32_t *sat)
172 {
173     if (shift <= -(64 + round)) {
174         return 0;
175     } else if (shift < 0) {
176         if (round) {
177             src >>= -shift - 1;
178             return (src >> 1) + (src & 1);
179         }
180         return src >> -shift;
181     } else if (shift < 64) {
182         uint64_t val = src << shift;
183         if (!sat || val >> shift == src) {
184             return val;
185         }
186     } else if (!sat || src == 0) {
187         return 0;
188     }
189 
190     *sat = 1;
191     return UINT64_MAX;
192 }
193 
194 static inline int64_t do_suqrshl_d(int64_t src, int64_t shift,
195                                    bool round, uint32_t *sat)
196 {
197     if (sat && src < 0) {
198         *sat = 1;
199         return 0;
200     }
201     return do_uqrshl_d(src, shift, round, sat);
202 }
203 
204 int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool);
205 int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
206 int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
207 int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
208 
209 #endif /* TARGET_ARM_VEC_INTERNALS_H */
210