xref: /qemu/target/arm/tcg/vec_internal.h (revision 93966af1d38213b26ef6efc4719851cbc18ec64f)
1 /*
2  * ARM AdvSIMD / SVE Vector Helpers
3  *
4  * Copyright (c) 2020 Linaro
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef TARGET_ARM_VEC_INTERNALS_H
21 #define TARGET_ARM_VEC_INTERNALS_H
22 
23 /*
24  * Note that vector data is stored in host-endian 64-bit chunks,
25  * so addressing units smaller than that needs a host-endian fixup.
26  *
27  * The H<N> macros are used when indexing an array of elements of size N.
28  *
29  * The H1_<N> macros are used when performing byte arithmetic and then
30  * casting the final pointer to a type of size N.
31  */
32 #ifdef HOST_WORDS_BIGENDIAN
33 #define H1(x)   ((x) ^ 7)
34 #define H1_2(x) ((x) ^ 6)
35 #define H1_4(x) ((x) ^ 4)
36 #define H2(x)   ((x) ^ 3)
37 #define H4(x)   ((x) ^ 1)
38 #else
39 #define H1(x)   (x)
40 #define H1_2(x) (x)
41 #define H1_4(x) (x)
42 #define H2(x)   (x)
43 #define H4(x)   (x)
44 #endif
45 
46 
47 static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
48 {
49     uint64_t *d = vd + opr_sz;
50     uintptr_t i;
51 
52     for (i = opr_sz; i < max_sz; i += 8) {
53         *d++ = 0;
54     }
55 }
56 
57 static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits,
58                                     bool round, uint32_t *sat)
59 {
60     if (shift <= -bits) {
61         /* Rounding the sign bit always produces 0. */
62         if (round) {
63             return 0;
64         }
65         return src >> 31;
66     } else if (shift < 0) {
67         if (round) {
68             src >>= -shift - 1;
69             return (src >> 1) + (src & 1);
70         }
71         return src >> -shift;
72     } else if (shift < bits) {
73         int32_t val = src << shift;
74         if (bits == 32) {
75             if (!sat || val >> shift == src) {
76                 return val;
77             }
78         } else {
79             int32_t extval = sextract32(val, 0, bits);
80             if (!sat || val == extval) {
81                 return extval;
82             }
83         }
84     } else if (!sat || src == 0) {
85         return 0;
86     }
87 
88     *sat = 1;
89     return (1u << (bits - 1)) - (src >= 0);
90 }
91 
92 static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits,
93                                      bool round, uint32_t *sat)
94 {
95     if (shift <= -(bits + round)) {
96         return 0;
97     } else if (shift < 0) {
98         if (round) {
99             src >>= -shift - 1;
100             return (src >> 1) + (src & 1);
101         }
102         return src >> -shift;
103     } else if (shift < bits) {
104         uint32_t val = src << shift;
105         if (bits == 32) {
106             if (!sat || val >> shift == src) {
107                 return val;
108             }
109         } else {
110             uint32_t extval = extract32(val, 0, bits);
111             if (!sat || val == extval) {
112                 return extval;
113             }
114         }
115     } else if (!sat || src == 0) {
116         return 0;
117     }
118 
119     *sat = 1;
120     return MAKE_64BIT_MASK(0, bits);
121 }
122 
123 static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits,
124                                      bool round, uint32_t *sat)
125 {
126     if (sat && src < 0) {
127         *sat = 1;
128         return 0;
129     }
130     return do_uqrshl_bhs(src, shift, bits, round, sat);
131 }
132 
133 static inline int64_t do_sqrshl_d(int64_t src, int64_t shift,
134                                   bool round, uint32_t *sat)
135 {
136     if (shift <= -64) {
137         /* Rounding the sign bit always produces 0. */
138         if (round) {
139             return 0;
140         }
141         return src >> 63;
142     } else if (shift < 0) {
143         if (round) {
144             src >>= -shift - 1;
145             return (src >> 1) + (src & 1);
146         }
147         return src >> -shift;
148     } else if (shift < 64) {
149         int64_t val = src << shift;
150         if (!sat || val >> shift == src) {
151             return val;
152         }
153     } else if (!sat || src == 0) {
154         return 0;
155     }
156 
157     *sat = 1;
158     return src < 0 ? INT64_MIN : INT64_MAX;
159 }
160 
161 static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift,
162                                    bool round, uint32_t *sat)
163 {
164     if (shift <= -(64 + round)) {
165         return 0;
166     } else if (shift < 0) {
167         if (round) {
168             src >>= -shift - 1;
169             return (src >> 1) + (src & 1);
170         }
171         return src >> -shift;
172     } else if (shift < 64) {
173         uint64_t val = src << shift;
174         if (!sat || val >> shift == src) {
175             return val;
176         }
177     } else if (!sat || src == 0) {
178         return 0;
179     }
180 
181     *sat = 1;
182     return UINT64_MAX;
183 }
184 
185 static inline int64_t do_suqrshl_d(int64_t src, int64_t shift,
186                                    bool round, uint32_t *sat)
187 {
188     if (sat && src < 0) {
189         *sat = 1;
190         return 0;
191     }
192     return do_uqrshl_d(src, shift, round, sat);
193 }
194 
195 int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool);
196 int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
197 int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
198 int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
199 
200 #endif /* TARGET_ARM_VEC_INTERNALS_H */
201