xref: /qemu/accel/tcg/tcg-runtime-gvec.c (revision 4221aa4a882403fc7d6d22ad6af9a58c8a5badf6)
1db432672SRichard Henderson /*
2db432672SRichard Henderson  * Generic vectorized operation runtime
3db432672SRichard Henderson  *
4db432672SRichard Henderson  * Copyright (c) 2018 Linaro
5db432672SRichard Henderson  *
6db432672SRichard Henderson  * This library is free software; you can redistribute it and/or
7db432672SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
8db432672SRichard Henderson  * License as published by the Free Software Foundation; either
9fb0343d5SThomas Huth  * version 2.1 of the License, or (at your option) any later version.
10db432672SRichard Henderson  *
11db432672SRichard Henderson  * This library is distributed in the hope that it will be useful,
12db432672SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13db432672SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14db432672SRichard Henderson  * Lesser General Public License for more details.
15db432672SRichard Henderson  *
16db432672SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
17db432672SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18db432672SRichard Henderson  */
19db432672SRichard Henderson 
20db432672SRichard Henderson #include "qemu/osdep.h"
21db432672SRichard Henderson #include "qemu/host-utils.h"
22db432672SRichard Henderson #include "cpu.h"
23db432672SRichard Henderson #include "exec/helper-proto.h"
24dcb32f1dSPhilippe Mathieu-Daudé #include "tcg/tcg-gvec-desc.h"
25db432672SRichard Henderson 
26db432672SRichard Henderson 
27db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
28db432672SRichard Henderson {
29db432672SRichard Henderson     intptr_t maxsz = simd_maxsz(desc);
30db432672SRichard Henderson     intptr_t i;
31db432672SRichard Henderson 
32db432672SRichard Henderson     if (unlikely(maxsz > oprsz)) {
33db432672SRichard Henderson         for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
34db432672SRichard Henderson             *(uint64_t *)(d + i) = 0;
35db432672SRichard Henderson         }
36db432672SRichard Henderson     }
37db432672SRichard Henderson }
38db432672SRichard Henderson 
39db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
40db432672SRichard Henderson {
41db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
42db432672SRichard Henderson     intptr_t i;
43db432672SRichard Henderson 
446c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
456c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
46db432672SRichard Henderson     }
47db432672SRichard Henderson     clear_high(d, oprsz, desc);
48db432672SRichard Henderson }
49db432672SRichard Henderson 
50db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
51db432672SRichard Henderson {
52db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
53db432672SRichard Henderson     intptr_t i;
54db432672SRichard Henderson 
556c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
566c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
57db432672SRichard Henderson     }
58db432672SRichard Henderson     clear_high(d, oprsz, desc);
59db432672SRichard Henderson }
60db432672SRichard Henderson 
61db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
62db432672SRichard Henderson {
63db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
64db432672SRichard Henderson     intptr_t i;
65db432672SRichard Henderson 
666c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
676c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
68db432672SRichard Henderson     }
69db432672SRichard Henderson     clear_high(d, oprsz, desc);
70db432672SRichard Henderson }
71db432672SRichard Henderson 
72db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
73db432672SRichard Henderson {
74db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
75db432672SRichard Henderson     intptr_t i;
76db432672SRichard Henderson 
776c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
786c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
79db432672SRichard Henderson     }
80db432672SRichard Henderson     clear_high(d, oprsz, desc);
81db432672SRichard Henderson }
82db432672SRichard Henderson 
8322fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
8422fc3527SRichard Henderson {
8522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8622fc3527SRichard Henderson     intptr_t i;
8722fc3527SRichard Henderson 
886c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
890a83e43aSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
9022fc3527SRichard Henderson     }
9122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
9222fc3527SRichard Henderson }
9322fc3527SRichard Henderson 
9422fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
9522fc3527SRichard Henderson {
9622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9722fc3527SRichard Henderson     intptr_t i;
9822fc3527SRichard Henderson 
996c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1000a83e43aSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
10122fc3527SRichard Henderson     }
10222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
10322fc3527SRichard Henderson }
10422fc3527SRichard Henderson 
10522fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
10622fc3527SRichard Henderson {
10722fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
10822fc3527SRichard Henderson     intptr_t i;
10922fc3527SRichard Henderson 
1106c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1110a83e43aSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
11222fc3527SRichard Henderson     }
11322fc3527SRichard Henderson     clear_high(d, oprsz, desc);
11422fc3527SRichard Henderson }
11522fc3527SRichard Henderson 
11622fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
11722fc3527SRichard Henderson {
11822fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
11922fc3527SRichard Henderson     intptr_t i;
12022fc3527SRichard Henderson 
1216c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1220a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
12322fc3527SRichard Henderson     }
12422fc3527SRichard Henderson     clear_high(d, oprsz, desc);
12522fc3527SRichard Henderson }
12622fc3527SRichard Henderson 
127db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
128db432672SRichard Henderson {
129db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
130db432672SRichard Henderson     intptr_t i;
131db432672SRichard Henderson 
1326c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1336c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
134db432672SRichard Henderson     }
135db432672SRichard Henderson     clear_high(d, oprsz, desc);
136db432672SRichard Henderson }
137db432672SRichard Henderson 
138db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
139db432672SRichard Henderson {
140db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
141db432672SRichard Henderson     intptr_t i;
142db432672SRichard Henderson 
1436c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1446c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
145db432672SRichard Henderson     }
146db432672SRichard Henderson     clear_high(d, oprsz, desc);
147db432672SRichard Henderson }
148db432672SRichard Henderson 
149db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
150db432672SRichard Henderson {
151db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
152db432672SRichard Henderson     intptr_t i;
153db432672SRichard Henderson 
1546c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1556c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
156db432672SRichard Henderson     }
157db432672SRichard Henderson     clear_high(d, oprsz, desc);
158db432672SRichard Henderson }
159db432672SRichard Henderson 
160db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
161db432672SRichard Henderson {
162db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
163db432672SRichard Henderson     intptr_t i;
164db432672SRichard Henderson 
1656c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1666c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
167db432672SRichard Henderson     }
168db432672SRichard Henderson     clear_high(d, oprsz, desc);
169db432672SRichard Henderson }
170db432672SRichard Henderson 
17122fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
17222fc3527SRichard Henderson {
17322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
17422fc3527SRichard Henderson     intptr_t i;
17522fc3527SRichard Henderson 
1766c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1770a83e43aSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
17822fc3527SRichard Henderson     }
17922fc3527SRichard Henderson     clear_high(d, oprsz, desc);
18022fc3527SRichard Henderson }
18122fc3527SRichard Henderson 
18222fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
18322fc3527SRichard Henderson {
18422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
18522fc3527SRichard Henderson     intptr_t i;
18622fc3527SRichard Henderson 
1876c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1880a83e43aSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
18922fc3527SRichard Henderson     }
19022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
19122fc3527SRichard Henderson }
19222fc3527SRichard Henderson 
19322fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
19422fc3527SRichard Henderson {
19522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
19622fc3527SRichard Henderson     intptr_t i;
19722fc3527SRichard Henderson 
1986c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1990a83e43aSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
20022fc3527SRichard Henderson     }
20122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
20222fc3527SRichard Henderson }
20322fc3527SRichard Henderson 
20422fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
20522fc3527SRichard Henderson {
20622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
20722fc3527SRichard Henderson     intptr_t i;
20822fc3527SRichard Henderson 
2096c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2100a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
21122fc3527SRichard Henderson     }
21222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
21322fc3527SRichard Henderson }
21422fc3527SRichard Henderson 
2153774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
2163774030aSRichard Henderson {
2173774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2183774030aSRichard Henderson     intptr_t i;
2193774030aSRichard Henderson 
2206c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
2216c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
2223774030aSRichard Henderson     }
2233774030aSRichard Henderson     clear_high(d, oprsz, desc);
2243774030aSRichard Henderson }
2253774030aSRichard Henderson 
2263774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
2273774030aSRichard Henderson {
2283774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2293774030aSRichard Henderson     intptr_t i;
2303774030aSRichard Henderson 
2316c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
2326c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
2333774030aSRichard Henderson     }
2343774030aSRichard Henderson     clear_high(d, oprsz, desc);
2353774030aSRichard Henderson }
2363774030aSRichard Henderson 
2373774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
2383774030aSRichard Henderson {
2393774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2403774030aSRichard Henderson     intptr_t i;
2413774030aSRichard Henderson 
2426c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
2436c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
2443774030aSRichard Henderson     }
2453774030aSRichard Henderson     clear_high(d, oprsz, desc);
2463774030aSRichard Henderson }
2473774030aSRichard Henderson 
2483774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
2493774030aSRichard Henderson {
2503774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2513774030aSRichard Henderson     intptr_t i;
2523774030aSRichard Henderson 
2536c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2546c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
2553774030aSRichard Henderson     }
2563774030aSRichard Henderson     clear_high(d, oprsz, desc);
2573774030aSRichard Henderson }
2583774030aSRichard Henderson 
25922fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
26022fc3527SRichard Henderson {
26122fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
26222fc3527SRichard Henderson     intptr_t i;
26322fc3527SRichard Henderson 
2646c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
2650a83e43aSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
26622fc3527SRichard Henderson     }
26722fc3527SRichard Henderson     clear_high(d, oprsz, desc);
26822fc3527SRichard Henderson }
26922fc3527SRichard Henderson 
27022fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
27122fc3527SRichard Henderson {
27222fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
27322fc3527SRichard Henderson     intptr_t i;
27422fc3527SRichard Henderson 
2756c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
2760a83e43aSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
27722fc3527SRichard Henderson     }
27822fc3527SRichard Henderson     clear_high(d, oprsz, desc);
27922fc3527SRichard Henderson }
28022fc3527SRichard Henderson 
28122fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
28222fc3527SRichard Henderson {
28322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
28422fc3527SRichard Henderson     intptr_t i;
28522fc3527SRichard Henderson 
2866c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
2870a83e43aSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
28822fc3527SRichard Henderson     }
28922fc3527SRichard Henderson     clear_high(d, oprsz, desc);
29022fc3527SRichard Henderson }
29122fc3527SRichard Henderson 
29222fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
29322fc3527SRichard Henderson {
29422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
29522fc3527SRichard Henderson     intptr_t i;
29622fc3527SRichard Henderson 
2976c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2980a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
29922fc3527SRichard Henderson     }
30022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
30122fc3527SRichard Henderson }
30222fc3527SRichard Henderson 
303db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
304db432672SRichard Henderson {
305db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
306db432672SRichard Henderson     intptr_t i;
307db432672SRichard Henderson 
3086c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
3096c7ab301SRichard Henderson         *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
310db432672SRichard Henderson     }
311db432672SRichard Henderson     clear_high(d, oprsz, desc);
312db432672SRichard Henderson }
313db432672SRichard Henderson 
314db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
315db432672SRichard Henderson {
316db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
317db432672SRichard Henderson     intptr_t i;
318db432672SRichard Henderson 
3196c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
3206c7ab301SRichard Henderson         *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
321db432672SRichard Henderson     }
322db432672SRichard Henderson     clear_high(d, oprsz, desc);
323db432672SRichard Henderson }
324db432672SRichard Henderson 
325db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
326db432672SRichard Henderson {
327db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
328db432672SRichard Henderson     intptr_t i;
329db432672SRichard Henderson 
3306c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
3316c7ab301SRichard Henderson         *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
332db432672SRichard Henderson     }
333db432672SRichard Henderson     clear_high(d, oprsz, desc);
334db432672SRichard Henderson }
335db432672SRichard Henderson 
336db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
337db432672SRichard Henderson {
338db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
339db432672SRichard Henderson     intptr_t i;
340db432672SRichard Henderson 
3416c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
3426c7ab301SRichard Henderson         *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
343db432672SRichard Henderson     }
344db432672SRichard Henderson     clear_high(d, oprsz, desc);
345db432672SRichard Henderson }
346db432672SRichard Henderson 
347bcefc902SRichard Henderson void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
348bcefc902SRichard Henderson {
349bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
350bcefc902SRichard Henderson     intptr_t i;
351bcefc902SRichard Henderson 
352bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
353bcefc902SRichard Henderson         int8_t aa = *(int8_t *)(a + i);
354bcefc902SRichard Henderson         *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
355bcefc902SRichard Henderson     }
356bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
357bcefc902SRichard Henderson }
358bcefc902SRichard Henderson 
359bcefc902SRichard Henderson void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
360bcefc902SRichard Henderson {
361bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
362bcefc902SRichard Henderson     intptr_t i;
363bcefc902SRichard Henderson 
364bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
365bcefc902SRichard Henderson         int16_t aa = *(int16_t *)(a + i);
366bcefc902SRichard Henderson         *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
367bcefc902SRichard Henderson     }
368bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
369bcefc902SRichard Henderson }
370bcefc902SRichard Henderson 
371bcefc902SRichard Henderson void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
372bcefc902SRichard Henderson {
373bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
374bcefc902SRichard Henderson     intptr_t i;
375bcefc902SRichard Henderson 
376bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
377bcefc902SRichard Henderson         int32_t aa = *(int32_t *)(a + i);
378bcefc902SRichard Henderson         *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
379bcefc902SRichard Henderson     }
380bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
381bcefc902SRichard Henderson }
382bcefc902SRichard Henderson 
383bcefc902SRichard Henderson void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
384bcefc902SRichard Henderson {
385bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
386bcefc902SRichard Henderson     intptr_t i;
387bcefc902SRichard Henderson 
388bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
389bcefc902SRichard Henderson         int64_t aa = *(int64_t *)(a + i);
390bcefc902SRichard Henderson         *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
391bcefc902SRichard Henderson     }
392bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
393bcefc902SRichard Henderson }
394bcefc902SRichard Henderson 
395db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
396db432672SRichard Henderson {
397db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
398db432672SRichard Henderson 
399db432672SRichard Henderson     memcpy(d, a, oprsz);
400db432672SRichard Henderson     clear_high(d, oprsz, desc);
401db432672SRichard Henderson }
402db432672SRichard Henderson 
403db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
404db432672SRichard Henderson {
405db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
406db432672SRichard Henderson     intptr_t i;
407db432672SRichard Henderson 
408db432672SRichard Henderson     if (c == 0) {
409db432672SRichard Henderson         oprsz = 0;
410db432672SRichard Henderson     } else {
411db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
412db432672SRichard Henderson             *(uint64_t *)(d + i) = c;
413db432672SRichard Henderson         }
414db432672SRichard Henderson     }
415db432672SRichard Henderson     clear_high(d, oprsz, desc);
416db432672SRichard Henderson }
417db432672SRichard Henderson 
418db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
419db432672SRichard Henderson {
420db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
421db432672SRichard Henderson     intptr_t i;
422db432672SRichard Henderson 
423db432672SRichard Henderson     if (c == 0) {
424db432672SRichard Henderson         oprsz = 0;
425db432672SRichard Henderson     } else {
426db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
427db432672SRichard Henderson             *(uint32_t *)(d + i) = c;
428db432672SRichard Henderson         }
429db432672SRichard Henderson     }
430db432672SRichard Henderson     clear_high(d, oprsz, desc);
431db432672SRichard Henderson }
432db432672SRichard Henderson 
433db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
434db432672SRichard Henderson {
435db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
436db432672SRichard Henderson }
437db432672SRichard Henderson 
438db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
439db432672SRichard Henderson {
440db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
441db432672SRichard Henderson }
442db432672SRichard Henderson 
443db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
444db432672SRichard Henderson {
445db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
446db432672SRichard Henderson     intptr_t i;
447db432672SRichard Henderson 
4486c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4496c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
450db432672SRichard Henderson     }
451db432672SRichard Henderson     clear_high(d, oprsz, desc);
452db432672SRichard Henderson }
453db432672SRichard Henderson 
454db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
455db432672SRichard Henderson {
456db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
457db432672SRichard Henderson     intptr_t i;
458db432672SRichard Henderson 
4596c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4606c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
461db432672SRichard Henderson     }
462db432672SRichard Henderson     clear_high(d, oprsz, desc);
463db432672SRichard Henderson }
464db432672SRichard Henderson 
465db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
466db432672SRichard Henderson {
467db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
468db432672SRichard Henderson     intptr_t i;
469db432672SRichard Henderson 
4706c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4716c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
472db432672SRichard Henderson     }
473db432672SRichard Henderson     clear_high(d, oprsz, desc);
474db432672SRichard Henderson }
475db432672SRichard Henderson 
476db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
477db432672SRichard Henderson {
478db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
479db432672SRichard Henderson     intptr_t i;
480db432672SRichard Henderson 
4816c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4826c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
483db432672SRichard Henderson     }
484db432672SRichard Henderson     clear_high(d, oprsz, desc);
485db432672SRichard Henderson }
486db432672SRichard Henderson 
487db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
488db432672SRichard Henderson {
489db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
490db432672SRichard Henderson     intptr_t i;
491db432672SRichard Henderson 
4926c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4936c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
494db432672SRichard Henderson     }
495db432672SRichard Henderson     clear_high(d, oprsz, desc);
496db432672SRichard Henderson }
497db432672SRichard Henderson 
498db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
499db432672SRichard Henderson {
500db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
501db432672SRichard Henderson     intptr_t i;
502db432672SRichard Henderson 
5036c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5046c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
505db432672SRichard Henderson     }
506db432672SRichard Henderson     clear_high(d, oprsz, desc);
507db432672SRichard Henderson }
508d0ec9796SRichard Henderson 
509f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
510f550805dSRichard Henderson {
511f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
512f550805dSRichard Henderson     intptr_t i;
513f550805dSRichard Henderson 
5146c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5156c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
516f550805dSRichard Henderson     }
517f550805dSRichard Henderson     clear_high(d, oprsz, desc);
518f550805dSRichard Henderson }
519f550805dSRichard Henderson 
520f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
521f550805dSRichard Henderson {
522f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
523f550805dSRichard Henderson     intptr_t i;
524f550805dSRichard Henderson 
5256c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5266c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
527f550805dSRichard Henderson     }
528f550805dSRichard Henderson     clear_high(d, oprsz, desc);
529f550805dSRichard Henderson }
530f550805dSRichard Henderson 
531f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
532f550805dSRichard Henderson {
533f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
534f550805dSRichard Henderson     intptr_t i;
535f550805dSRichard Henderson 
5366c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5376c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
538f550805dSRichard Henderson     }
539f550805dSRichard Henderson     clear_high(d, oprsz, desc);
540f550805dSRichard Henderson }
541f550805dSRichard Henderson 
54222fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
54322fc3527SRichard Henderson {
54422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
54522fc3527SRichard Henderson     intptr_t i;
54622fc3527SRichard Henderson 
5476c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5480a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
54922fc3527SRichard Henderson     }
55022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
55122fc3527SRichard Henderson }
55222fc3527SRichard Henderson 
553*4221aa4aSNazar Kazakov void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
554*4221aa4aSNazar Kazakov {
555*4221aa4aSNazar Kazakov     intptr_t oprsz = simd_oprsz(desc);
556*4221aa4aSNazar Kazakov     intptr_t i;
557*4221aa4aSNazar Kazakov 
558*4221aa4aSNazar Kazakov     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
559*4221aa4aSNazar Kazakov         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
560*4221aa4aSNazar Kazakov     }
561*4221aa4aSNazar Kazakov     clear_high(d, oprsz, desc);
562*4221aa4aSNazar Kazakov }
563*4221aa4aSNazar Kazakov 
56422fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
56522fc3527SRichard Henderson {
56622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
56722fc3527SRichard Henderson     intptr_t i;
56822fc3527SRichard Henderson 
5696c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5700a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
57122fc3527SRichard Henderson     }
57222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
57322fc3527SRichard Henderson }
57422fc3527SRichard Henderson 
57522fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
57622fc3527SRichard Henderson {
57722fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
57822fc3527SRichard Henderson     intptr_t i;
57922fc3527SRichard Henderson 
5806c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5810a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
58222fc3527SRichard Henderson     }
58322fc3527SRichard Henderson     clear_high(d, oprsz, desc);
58422fc3527SRichard Henderson }
58522fc3527SRichard Henderson 
586d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
587d0ec9796SRichard Henderson {
588d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
589d0ec9796SRichard Henderson     int shift = simd_data(desc);
590d0ec9796SRichard Henderson     intptr_t i;
591d0ec9796SRichard Henderson 
5926c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
5936c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
594d0ec9796SRichard Henderson     }
595d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
596d0ec9796SRichard Henderson }
597d0ec9796SRichard Henderson 
598d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
599d0ec9796SRichard Henderson {
600d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
601d0ec9796SRichard Henderson     int shift = simd_data(desc);
602d0ec9796SRichard Henderson     intptr_t i;
603d0ec9796SRichard Henderson 
6046c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
6056c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
606d0ec9796SRichard Henderson     }
607d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
608d0ec9796SRichard Henderson }
609d0ec9796SRichard Henderson 
610d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
611d0ec9796SRichard Henderson {
612d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
613d0ec9796SRichard Henderson     int shift = simd_data(desc);
614d0ec9796SRichard Henderson     intptr_t i;
615d0ec9796SRichard Henderson 
6166c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
6176c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
618d0ec9796SRichard Henderson     }
619d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
620d0ec9796SRichard Henderson }
621d0ec9796SRichard Henderson 
622d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
623d0ec9796SRichard Henderson {
624d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
625d0ec9796SRichard Henderson     int shift = simd_data(desc);
626d0ec9796SRichard Henderson     intptr_t i;
627d0ec9796SRichard Henderson 
6286c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
6296c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
630d0ec9796SRichard Henderson     }
631d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
632d0ec9796SRichard Henderson }
633d0ec9796SRichard Henderson 
634d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
635d0ec9796SRichard Henderson {
636d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
637d0ec9796SRichard Henderson     int shift = simd_data(desc);
638d0ec9796SRichard Henderson     intptr_t i;
639d0ec9796SRichard Henderson 
6406c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
6416c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
642d0ec9796SRichard Henderson     }
643d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
644d0ec9796SRichard Henderson }
645d0ec9796SRichard Henderson 
646d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
647d0ec9796SRichard Henderson {
648d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
649d0ec9796SRichard Henderson     int shift = simd_data(desc);
650d0ec9796SRichard Henderson     intptr_t i;
651d0ec9796SRichard Henderson 
6526c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
6536c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
654d0ec9796SRichard Henderson     }
655d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
656d0ec9796SRichard Henderson }
657d0ec9796SRichard Henderson 
658d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
659d0ec9796SRichard Henderson {
660d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
661d0ec9796SRichard Henderson     int shift = simd_data(desc);
662d0ec9796SRichard Henderson     intptr_t i;
663d0ec9796SRichard Henderson 
6646c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
6656c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
666d0ec9796SRichard Henderson     }
667d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
668d0ec9796SRichard Henderson }
669d0ec9796SRichard Henderson 
670d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
671d0ec9796SRichard Henderson {
672d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
673d0ec9796SRichard Henderson     int shift = simd_data(desc);
674d0ec9796SRichard Henderson     intptr_t i;
675d0ec9796SRichard Henderson 
6766c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
6776c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
678d0ec9796SRichard Henderson     }
679d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
680d0ec9796SRichard Henderson }
681d0ec9796SRichard Henderson 
682d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
683d0ec9796SRichard Henderson {
684d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
685d0ec9796SRichard Henderson     int shift = simd_data(desc);
686d0ec9796SRichard Henderson     intptr_t i;
687d0ec9796SRichard Henderson 
6886c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
6896c7ab301SRichard Henderson         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
690d0ec9796SRichard Henderson     }
691d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
692d0ec9796SRichard Henderson }
693d0ec9796SRichard Henderson 
694d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
695d0ec9796SRichard Henderson {
696d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
697d0ec9796SRichard Henderson     int shift = simd_data(desc);
698d0ec9796SRichard Henderson     intptr_t i;
699d0ec9796SRichard Henderson 
7006c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
7016c7ab301SRichard Henderson         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
702d0ec9796SRichard Henderson     }
703d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
704d0ec9796SRichard Henderson }
705d0ec9796SRichard Henderson 
706d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
707d0ec9796SRichard Henderson {
708d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
709d0ec9796SRichard Henderson     int shift = simd_data(desc);
710d0ec9796SRichard Henderson     intptr_t i;
711d0ec9796SRichard Henderson 
7126c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
7136c7ab301SRichard Henderson         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
714d0ec9796SRichard Henderson     }
715d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
716d0ec9796SRichard Henderson }
717d0ec9796SRichard Henderson 
718d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
719d0ec9796SRichard Henderson {
720d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
721d0ec9796SRichard Henderson     int shift = simd_data(desc);
722d0ec9796SRichard Henderson     intptr_t i;
723d0ec9796SRichard Henderson 
7246c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
7256c7ab301SRichard Henderson         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
726d0ec9796SRichard Henderson     }
727d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
728d0ec9796SRichard Henderson }
729212be173SRichard Henderson 
730b0f7e744SRichard Henderson void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
731b0f7e744SRichard Henderson {
732b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
733b0f7e744SRichard Henderson     int shift = simd_data(desc);
734b0f7e744SRichard Henderson     intptr_t i;
735b0f7e744SRichard Henderson 
736b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
737b0f7e744SRichard Henderson         *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
738b0f7e744SRichard Henderson     }
739b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
740b0f7e744SRichard Henderson }
741b0f7e744SRichard Henderson 
742b0f7e744SRichard Henderson void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
743b0f7e744SRichard Henderson {
744b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
745b0f7e744SRichard Henderson     int shift = simd_data(desc);
746b0f7e744SRichard Henderson     intptr_t i;
747b0f7e744SRichard Henderson 
748b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
749b0f7e744SRichard Henderson         *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
750b0f7e744SRichard Henderson     }
751b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
752b0f7e744SRichard Henderson }
753b0f7e744SRichard Henderson 
754b0f7e744SRichard Henderson void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
755b0f7e744SRichard Henderson {
756b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
757b0f7e744SRichard Henderson     int shift = simd_data(desc);
758b0f7e744SRichard Henderson     intptr_t i;
759b0f7e744SRichard Henderson 
760b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
761b0f7e744SRichard Henderson         *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
762b0f7e744SRichard Henderson     }
763b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
764b0f7e744SRichard Henderson }
765b0f7e744SRichard Henderson 
766b0f7e744SRichard Henderson void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
767b0f7e744SRichard Henderson {
768b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
769b0f7e744SRichard Henderson     int shift = simd_data(desc);
770b0f7e744SRichard Henderson     intptr_t i;
771b0f7e744SRichard Henderson 
772b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
773b0f7e744SRichard Henderson         *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
774b0f7e744SRichard Henderson     }
775b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
776b0f7e744SRichard Henderson }
777b0f7e744SRichard Henderson 
7785ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
7795ee5c14cSRichard Henderson {
7805ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7815ee5c14cSRichard Henderson     intptr_t i;
7825ee5c14cSRichard Henderson 
7835ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
7845ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
7855ee5c14cSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
7865ee5c14cSRichard Henderson     }
7875ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7885ee5c14cSRichard Henderson }
7895ee5c14cSRichard Henderson 
7905ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
7915ee5c14cSRichard Henderson {
7925ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7935ee5c14cSRichard Henderson     intptr_t i;
7945ee5c14cSRichard Henderson 
7955ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
7965ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
7975ee5c14cSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
7985ee5c14cSRichard Henderson     }
7995ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8005ee5c14cSRichard Henderson }
8015ee5c14cSRichard Henderson 
8025ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
8035ee5c14cSRichard Henderson {
8045ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8055ee5c14cSRichard Henderson     intptr_t i;
8065ee5c14cSRichard Henderson 
8075ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
8085ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
8095ee5c14cSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
8105ee5c14cSRichard Henderson     }
8115ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8125ee5c14cSRichard Henderson }
8135ee5c14cSRichard Henderson 
8145ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
8155ee5c14cSRichard Henderson {
8165ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8175ee5c14cSRichard Henderson     intptr_t i;
8185ee5c14cSRichard Henderson 
8195ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8205ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
8215ee5c14cSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
8225ee5c14cSRichard Henderson     }
8235ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8245ee5c14cSRichard Henderson }
8255ee5c14cSRichard Henderson 
8265ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
8275ee5c14cSRichard Henderson {
8285ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8295ee5c14cSRichard Henderson     intptr_t i;
8305ee5c14cSRichard Henderson 
8315ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
8325ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
8335ee5c14cSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
8345ee5c14cSRichard Henderson     }
8355ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8365ee5c14cSRichard Henderson }
8375ee5c14cSRichard Henderson 
8385ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
8395ee5c14cSRichard Henderson {
8405ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8415ee5c14cSRichard Henderson     intptr_t i;
8425ee5c14cSRichard Henderson 
8435ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
8445ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
8455ee5c14cSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
8465ee5c14cSRichard Henderson     }
8475ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8485ee5c14cSRichard Henderson }
8495ee5c14cSRichard Henderson 
8505ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
8515ee5c14cSRichard Henderson {
8525ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8535ee5c14cSRichard Henderson     intptr_t i;
8545ee5c14cSRichard Henderson 
8555ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
8565ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
8575ee5c14cSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
8585ee5c14cSRichard Henderson     }
8595ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8605ee5c14cSRichard Henderson }
8615ee5c14cSRichard Henderson 
8625ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
8635ee5c14cSRichard Henderson {
8645ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8655ee5c14cSRichard Henderson     intptr_t i;
8665ee5c14cSRichard Henderson 
8675ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8685ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
8695ee5c14cSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
8705ee5c14cSRichard Henderson     }
8715ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8725ee5c14cSRichard Henderson }
8735ee5c14cSRichard Henderson 
8745ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
8755ee5c14cSRichard Henderson {
8765ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8775ee5c14cSRichard Henderson     intptr_t i;
8785ee5c14cSRichard Henderson 
879899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
8805ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
8815ee5c14cSRichard Henderson         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
8825ee5c14cSRichard Henderson     }
8835ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8845ee5c14cSRichard Henderson }
8855ee5c14cSRichard Henderson 
8865ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
8875ee5c14cSRichard Henderson {
8885ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8895ee5c14cSRichard Henderson     intptr_t i;
8905ee5c14cSRichard Henderson 
8915ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
8925ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
8935ee5c14cSRichard Henderson         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
8945ee5c14cSRichard Henderson     }
8955ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8965ee5c14cSRichard Henderson }
8975ee5c14cSRichard Henderson 
8985ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
8995ee5c14cSRichard Henderson {
9005ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9015ee5c14cSRichard Henderson     intptr_t i;
9025ee5c14cSRichard Henderson 
903899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
9045ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
9055ee5c14cSRichard Henderson         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
9065ee5c14cSRichard Henderson     }
9075ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
9085ee5c14cSRichard Henderson }
9095ee5c14cSRichard Henderson 
9105ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
9115ee5c14cSRichard Henderson {
9125ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9135ee5c14cSRichard Henderson     intptr_t i;
9145ee5c14cSRichard Henderson 
915899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
9165ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
9175ee5c14cSRichard Henderson         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
9185ee5c14cSRichard Henderson     }
9195ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
9205ee5c14cSRichard Henderson }
9215ee5c14cSRichard Henderson 
9225d0ceda9SRichard Henderson void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
9235d0ceda9SRichard Henderson {
9245d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9255d0ceda9SRichard Henderson     intptr_t i;
9265d0ceda9SRichard Henderson 
9275d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
9285d0ceda9SRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
9295d0ceda9SRichard Henderson         *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
9305d0ceda9SRichard Henderson     }
9315d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
9325d0ceda9SRichard Henderson }
9335d0ceda9SRichard Henderson 
9345d0ceda9SRichard Henderson void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
9355d0ceda9SRichard Henderson {
9365d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9375d0ceda9SRichard Henderson     intptr_t i;
9385d0ceda9SRichard Henderson 
9395d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
9405d0ceda9SRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
9415d0ceda9SRichard Henderson         *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
9425d0ceda9SRichard Henderson     }
9435d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
9445d0ceda9SRichard Henderson }
9455d0ceda9SRichard Henderson 
9465d0ceda9SRichard Henderson void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
9475d0ceda9SRichard Henderson {
9485d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9495d0ceda9SRichard Henderson     intptr_t i;
9505d0ceda9SRichard Henderson 
9515d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
9525d0ceda9SRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
9535d0ceda9SRichard Henderson         *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
9545d0ceda9SRichard Henderson     }
9555d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
9565d0ceda9SRichard Henderson }
9575d0ceda9SRichard Henderson 
9585d0ceda9SRichard Henderson void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
9595d0ceda9SRichard Henderson {
9605d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9615d0ceda9SRichard Henderson     intptr_t i;
9625d0ceda9SRichard Henderson 
9635d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
9645d0ceda9SRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
9655d0ceda9SRichard Henderson         *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
9665d0ceda9SRichard Henderson     }
9675d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
9685d0ceda9SRichard Henderson }
9695d0ceda9SRichard Henderson 
9705d0ceda9SRichard Henderson void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
9715d0ceda9SRichard Henderson {
9725d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9735d0ceda9SRichard Henderson     intptr_t i;
9745d0ceda9SRichard Henderson 
9755d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
9765d0ceda9SRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
9775d0ceda9SRichard Henderson         *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
9785d0ceda9SRichard Henderson     }
9795d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
9805d0ceda9SRichard Henderson }
9815d0ceda9SRichard Henderson 
9825d0ceda9SRichard Henderson void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
9835d0ceda9SRichard Henderson {
9845d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9855d0ceda9SRichard Henderson     intptr_t i;
9865d0ceda9SRichard Henderson 
9875d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
9885d0ceda9SRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
9895d0ceda9SRichard Henderson         *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
9905d0ceda9SRichard Henderson     }
9915d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
9925d0ceda9SRichard Henderson }
9935d0ceda9SRichard Henderson 
9945d0ceda9SRichard Henderson void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
9955d0ceda9SRichard Henderson {
9965d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9975d0ceda9SRichard Henderson     intptr_t i;
9985d0ceda9SRichard Henderson 
9995d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
10005d0ceda9SRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
10015d0ceda9SRichard Henderson         *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
10025d0ceda9SRichard Henderson     }
10035d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
10045d0ceda9SRichard Henderson }
10055d0ceda9SRichard Henderson 
10065d0ceda9SRichard Henderson void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
10075d0ceda9SRichard Henderson {
10085d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
10095d0ceda9SRichard Henderson     intptr_t i;
10105d0ceda9SRichard Henderson 
10115d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
10125d0ceda9SRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
10135d0ceda9SRichard Henderson         *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
10145d0ceda9SRichard Henderson     }
10155d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
10165d0ceda9SRichard Henderson }
10175d0ceda9SRichard Henderson 
1018212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP)                                            \
1019212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
1020212be173SRichard Henderson {                                                                          \
1021212be173SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);                                     \
1022212be173SRichard Henderson     intptr_t i;                                                            \
10236cb1d3b8SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
10240270bd50SRichard Henderson         *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i));        \
1025212be173SRichard Henderson     }                                                                      \
1026212be173SRichard Henderson     clear_high(d, oprsz, desc);                                            \
1027212be173SRichard Henderson }
1028212be173SRichard Henderson 
1029212be173SRichard Henderson #define DO_CMP2(SZ) \
10306c7ab301SRichard Henderson     DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==)    \
10316c7ab301SRichard Henderson     DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=)    \
10326c7ab301SRichard Henderson     DO_CMP1(gvec_lt##SZ, int##SZ##_t, <)      \
10336c7ab301SRichard Henderson     DO_CMP1(gvec_le##SZ, int##SZ##_t, <=)     \
10346c7ab301SRichard Henderson     DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <)    \
10356c7ab301SRichard Henderson     DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
1036212be173SRichard Henderson 
1037212be173SRichard Henderson DO_CMP2(8)
1038212be173SRichard Henderson DO_CMP2(16)
1039212be173SRichard Henderson DO_CMP2(32)
1040212be173SRichard Henderson DO_CMP2(64)
1041212be173SRichard Henderson 
1042212be173SRichard Henderson #undef DO_CMP1
1043212be173SRichard Henderson #undef DO_CMP2
1044f49b12c6SRichard Henderson 
1045f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
1046f49b12c6SRichard Henderson {
1047f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1048f49b12c6SRichard Henderson     intptr_t i;
1049f49b12c6SRichard Henderson 
1050f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1051f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
1052f49b12c6SRichard Henderson         if (r > INT8_MAX) {
1053f49b12c6SRichard Henderson             r = INT8_MAX;
1054f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
1055f49b12c6SRichard Henderson             r = INT8_MIN;
1056f49b12c6SRichard Henderson         }
1057f49b12c6SRichard Henderson         *(int8_t *)(d + i) = r;
1058f49b12c6SRichard Henderson     }
1059f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1060f49b12c6SRichard Henderson }
1061f49b12c6SRichard Henderson 
1062f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
1063f49b12c6SRichard Henderson {
1064f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1065f49b12c6SRichard Henderson     intptr_t i;
1066f49b12c6SRichard Henderson 
1067f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1068f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
1069f49b12c6SRichard Henderson         if (r > INT16_MAX) {
1070f49b12c6SRichard Henderson             r = INT16_MAX;
1071f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
1072f49b12c6SRichard Henderson             r = INT16_MIN;
1073f49b12c6SRichard Henderson         }
1074f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
1075f49b12c6SRichard Henderson     }
1076f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1077f49b12c6SRichard Henderson }
1078f49b12c6SRichard Henderson 
1079f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
1080f49b12c6SRichard Henderson {
1081f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1082f49b12c6SRichard Henderson     intptr_t i;
1083f49b12c6SRichard Henderson 
1084f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1085f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
1086f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
10877702a855SRichard Henderson         int32_t di;
10887702a855SRichard Henderson         if (sadd32_overflow(ai, bi, &di)) {
1089f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
1090f49b12c6SRichard Henderson         }
1091f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
1092f49b12c6SRichard Henderson     }
1093f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1094f49b12c6SRichard Henderson }
1095f49b12c6SRichard Henderson 
1096f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1097f49b12c6SRichard Henderson {
1098f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1099f49b12c6SRichard Henderson     intptr_t i;
1100f49b12c6SRichard Henderson 
1101f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1102f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
1103f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
11047702a855SRichard Henderson         int64_t di;
11057702a855SRichard Henderson         if (sadd64_overflow(ai, bi, &di)) {
1106f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
1107f49b12c6SRichard Henderson         }
1108f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
1109f49b12c6SRichard Henderson     }
1110f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1111f49b12c6SRichard Henderson }
1112f49b12c6SRichard Henderson 
1113f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1114f49b12c6SRichard Henderson {
1115f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1116f49b12c6SRichard Henderson     intptr_t i;
1117f49b12c6SRichard Henderson 
1118f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1119f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1120f49b12c6SRichard Henderson         if (r > INT8_MAX) {
1121f49b12c6SRichard Henderson             r = INT8_MAX;
1122f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
1123f49b12c6SRichard Henderson             r = INT8_MIN;
1124f49b12c6SRichard Henderson         }
1125f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1126f49b12c6SRichard Henderson     }
1127f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1128f49b12c6SRichard Henderson }
1129f49b12c6SRichard Henderson 
1130f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1131f49b12c6SRichard Henderson {
1132f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1133f49b12c6SRichard Henderson     intptr_t i;
1134f49b12c6SRichard Henderson 
1135f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1136f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1137f49b12c6SRichard Henderson         if (r > INT16_MAX) {
1138f49b12c6SRichard Henderson             r = INT16_MAX;
1139f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
1140f49b12c6SRichard Henderson             r = INT16_MIN;
1141f49b12c6SRichard Henderson         }
1142f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
1143f49b12c6SRichard Henderson     }
1144f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1145f49b12c6SRichard Henderson }
1146f49b12c6SRichard Henderson 
1147f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1148f49b12c6SRichard Henderson {
1149f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1150f49b12c6SRichard Henderson     intptr_t i;
1151f49b12c6SRichard Henderson 
1152f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1153f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
1154f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
11557702a855SRichard Henderson         int32_t di;
11567702a855SRichard Henderson         if (ssub32_overflow(ai, bi, &di)) {
1157f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
1158f49b12c6SRichard Henderson         }
1159f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
1160f49b12c6SRichard Henderson     }
1161f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1162f49b12c6SRichard Henderson }
1163f49b12c6SRichard Henderson 
1164f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1165f49b12c6SRichard Henderson {
1166f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1167f49b12c6SRichard Henderson     intptr_t i;
1168f49b12c6SRichard Henderson 
1169f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1170f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
1171f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
11727702a855SRichard Henderson         int64_t di;
11737702a855SRichard Henderson         if (ssub64_overflow(ai, bi, &di)) {
1174f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
1175f49b12c6SRichard Henderson         }
1176f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
1177f49b12c6SRichard Henderson     }
1178f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1179f49b12c6SRichard Henderson }
1180f49b12c6SRichard Henderson 
1181f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1182f49b12c6SRichard Henderson {
1183f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1184f49b12c6SRichard Henderson     intptr_t i;
1185f49b12c6SRichard Henderson 
1186f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1187f49b12c6SRichard Henderson         unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1188f49b12c6SRichard Henderson         if (r > UINT8_MAX) {
1189f49b12c6SRichard Henderson             r = UINT8_MAX;
1190f49b12c6SRichard Henderson         }
1191f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1192f49b12c6SRichard Henderson     }
1193f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1194f49b12c6SRichard Henderson }
1195f49b12c6SRichard Henderson 
1196f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1197f49b12c6SRichard Henderson {
1198f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1199f49b12c6SRichard Henderson     intptr_t i;
1200f49b12c6SRichard Henderson 
1201f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1202f49b12c6SRichard Henderson         unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1203f49b12c6SRichard Henderson         if (r > UINT16_MAX) {
1204f49b12c6SRichard Henderson             r = UINT16_MAX;
1205f49b12c6SRichard Henderson         }
1206f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
1207f49b12c6SRichard Henderson     }
1208f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1209f49b12c6SRichard Henderson }
1210f49b12c6SRichard Henderson 
1211f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1212f49b12c6SRichard Henderson {
1213f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1214f49b12c6SRichard Henderson     intptr_t i;
1215f49b12c6SRichard Henderson 
1216f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1217f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1218f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
12197702a855SRichard Henderson         uint32_t di;
12207702a855SRichard Henderson         if (uadd32_overflow(ai, bi, &di)) {
1221f49b12c6SRichard Henderson             di = UINT32_MAX;
1222f49b12c6SRichard Henderson         }
1223f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1224f49b12c6SRichard Henderson     }
1225f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1226f49b12c6SRichard Henderson }
1227f49b12c6SRichard Henderson 
1228f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1229f49b12c6SRichard Henderson {
1230f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1231f49b12c6SRichard Henderson     intptr_t i;
1232f49b12c6SRichard Henderson 
1233f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1234f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1235f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
12367702a855SRichard Henderson         uint64_t di;
12377702a855SRichard Henderson         if (uadd64_overflow(ai, bi, &di)) {
1238f49b12c6SRichard Henderson             di = UINT64_MAX;
1239f49b12c6SRichard Henderson         }
1240f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1241f49b12c6SRichard Henderson     }
1242f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1243f49b12c6SRichard Henderson }
1244f49b12c6SRichard Henderson 
1245f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1246f49b12c6SRichard Henderson {
1247f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1248f49b12c6SRichard Henderson     intptr_t i;
1249f49b12c6SRichard Henderson 
1250f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1251f49b12c6SRichard Henderson         int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1252f49b12c6SRichard Henderson         if (r < 0) {
1253f49b12c6SRichard Henderson             r = 0;
1254f49b12c6SRichard Henderson         }
1255f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1256f49b12c6SRichard Henderson     }
1257f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1258f49b12c6SRichard Henderson }
1259f49b12c6SRichard Henderson 
1260f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1261f49b12c6SRichard Henderson {
1262f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1263f49b12c6SRichard Henderson     intptr_t i;
1264f49b12c6SRichard Henderson 
1265f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1266f49b12c6SRichard Henderson         int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1267f49b12c6SRichard Henderson         if (r < 0) {
1268f49b12c6SRichard Henderson             r = 0;
1269f49b12c6SRichard Henderson         }
1270f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
1271f49b12c6SRichard Henderson     }
1272f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1273f49b12c6SRichard Henderson }
1274f49b12c6SRichard Henderson 
1275f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1276f49b12c6SRichard Henderson {
1277f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1278f49b12c6SRichard Henderson     intptr_t i;
1279f49b12c6SRichard Henderson 
1280f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1281f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1282f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
12837702a855SRichard Henderson         uint32_t di;
12847702a855SRichard Henderson         if (usub32_overflow(ai, bi, &di)) {
1285f49b12c6SRichard Henderson             di = 0;
1286f49b12c6SRichard Henderson         }
1287f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1288f49b12c6SRichard Henderson     }
1289f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1290f49b12c6SRichard Henderson }
1291f49b12c6SRichard Henderson 
1292f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1293f49b12c6SRichard Henderson {
1294f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1295f49b12c6SRichard Henderson     intptr_t i;
1296f49b12c6SRichard Henderson 
1297f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1298f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1299f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
13007702a855SRichard Henderson         uint64_t di;
13017702a855SRichard Henderson         if (usub64_overflow(ai, bi, &di)) {
1302f49b12c6SRichard Henderson             di = 0;
1303f49b12c6SRichard Henderson         }
1304f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1305f49b12c6SRichard Henderson     }
1306f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1307f49b12c6SRichard Henderson }
1308dd0a0fcdSRichard Henderson 
1309dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1310dd0a0fcdSRichard Henderson {
1311dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1312dd0a0fcdSRichard Henderson     intptr_t i;
1313dd0a0fcdSRichard Henderson 
1314dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1315dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1316dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1317dd0a0fcdSRichard Henderson         int8_t dd = aa < bb ? aa : bb;
1318dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1319dd0a0fcdSRichard Henderson     }
1320dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1321dd0a0fcdSRichard Henderson }
1322dd0a0fcdSRichard Henderson 
1323dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1324dd0a0fcdSRichard Henderson {
1325dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1326dd0a0fcdSRichard Henderson     intptr_t i;
1327dd0a0fcdSRichard Henderson 
1328dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1329dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1330dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1331dd0a0fcdSRichard Henderson         int16_t dd = aa < bb ? aa : bb;
1332dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1333dd0a0fcdSRichard Henderson     }
1334dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1335dd0a0fcdSRichard Henderson }
1336dd0a0fcdSRichard Henderson 
1337dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1338dd0a0fcdSRichard Henderson {
1339dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1340dd0a0fcdSRichard Henderson     intptr_t i;
1341dd0a0fcdSRichard Henderson 
1342dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1343dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1344dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1345dd0a0fcdSRichard Henderson         int32_t dd = aa < bb ? aa : bb;
1346dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1347dd0a0fcdSRichard Henderson     }
1348dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1349dd0a0fcdSRichard Henderson }
1350dd0a0fcdSRichard Henderson 
1351dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1352dd0a0fcdSRichard Henderson {
1353dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1354dd0a0fcdSRichard Henderson     intptr_t i;
1355dd0a0fcdSRichard Henderson 
1356dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1357dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1358dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1359dd0a0fcdSRichard Henderson         int64_t dd = aa < bb ? aa : bb;
1360dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1361dd0a0fcdSRichard Henderson     }
1362dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1363dd0a0fcdSRichard Henderson }
1364dd0a0fcdSRichard Henderson 
1365dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1366dd0a0fcdSRichard Henderson {
1367dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1368dd0a0fcdSRichard Henderson     intptr_t i;
1369dd0a0fcdSRichard Henderson 
1370dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1371dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1372dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1373dd0a0fcdSRichard Henderson         int8_t dd = aa > bb ? aa : bb;
1374dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1375dd0a0fcdSRichard Henderson     }
1376dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1377dd0a0fcdSRichard Henderson }
1378dd0a0fcdSRichard Henderson 
1379dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1380dd0a0fcdSRichard Henderson {
1381dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1382dd0a0fcdSRichard Henderson     intptr_t i;
1383dd0a0fcdSRichard Henderson 
1384dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1385dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1386dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1387dd0a0fcdSRichard Henderson         int16_t dd = aa > bb ? aa : bb;
1388dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1389dd0a0fcdSRichard Henderson     }
1390dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1391dd0a0fcdSRichard Henderson }
1392dd0a0fcdSRichard Henderson 
1393dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1394dd0a0fcdSRichard Henderson {
1395dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1396dd0a0fcdSRichard Henderson     intptr_t i;
1397dd0a0fcdSRichard Henderson 
1398dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1399dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1400dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1401dd0a0fcdSRichard Henderson         int32_t dd = aa > bb ? aa : bb;
1402dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1403dd0a0fcdSRichard Henderson     }
1404dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1405dd0a0fcdSRichard Henderson }
1406dd0a0fcdSRichard Henderson 
1407dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1408dd0a0fcdSRichard Henderson {
1409dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1410dd0a0fcdSRichard Henderson     intptr_t i;
1411dd0a0fcdSRichard Henderson 
1412dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1413dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1414dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1415dd0a0fcdSRichard Henderson         int64_t dd = aa > bb ? aa : bb;
1416dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1417dd0a0fcdSRichard Henderson     }
1418dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1419dd0a0fcdSRichard Henderson }
1420dd0a0fcdSRichard Henderson 
1421dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1422dd0a0fcdSRichard Henderson {
1423dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1424dd0a0fcdSRichard Henderson     intptr_t i;
1425dd0a0fcdSRichard Henderson 
1426dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1427dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1428dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1429dd0a0fcdSRichard Henderson         uint8_t dd = aa < bb ? aa : bb;
1430dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1431dd0a0fcdSRichard Henderson     }
1432dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1433dd0a0fcdSRichard Henderson }
1434dd0a0fcdSRichard Henderson 
1435dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1436dd0a0fcdSRichard Henderson {
1437dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1438dd0a0fcdSRichard Henderson     intptr_t i;
1439dd0a0fcdSRichard Henderson 
1440dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1441dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1442dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1443dd0a0fcdSRichard Henderson         uint16_t dd = aa < bb ? aa : bb;
1444dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1445dd0a0fcdSRichard Henderson     }
1446dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1447dd0a0fcdSRichard Henderson }
1448dd0a0fcdSRichard Henderson 
1449dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1450dd0a0fcdSRichard Henderson {
1451dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1452dd0a0fcdSRichard Henderson     intptr_t i;
1453dd0a0fcdSRichard Henderson 
1454dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1455dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1456dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1457dd0a0fcdSRichard Henderson         uint32_t dd = aa < bb ? aa : bb;
1458dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1459dd0a0fcdSRichard Henderson     }
1460dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1461dd0a0fcdSRichard Henderson }
1462dd0a0fcdSRichard Henderson 
1463dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1464dd0a0fcdSRichard Henderson {
1465dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1466dd0a0fcdSRichard Henderson     intptr_t i;
1467dd0a0fcdSRichard Henderson 
1468dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1469dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1470dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1471dd0a0fcdSRichard Henderson         uint64_t dd = aa < bb ? aa : bb;
1472dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1473dd0a0fcdSRichard Henderson     }
1474dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1475dd0a0fcdSRichard Henderson }
1476dd0a0fcdSRichard Henderson 
1477dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1478dd0a0fcdSRichard Henderson {
1479dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1480dd0a0fcdSRichard Henderson     intptr_t i;
1481dd0a0fcdSRichard Henderson 
1482dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1483dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1484dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1485dd0a0fcdSRichard Henderson         uint8_t dd = aa > bb ? aa : bb;
1486dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1487dd0a0fcdSRichard Henderson     }
1488dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1489dd0a0fcdSRichard Henderson }
1490dd0a0fcdSRichard Henderson 
1491dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1492dd0a0fcdSRichard Henderson {
1493dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1494dd0a0fcdSRichard Henderson     intptr_t i;
1495dd0a0fcdSRichard Henderson 
1496dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1497dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1498dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1499dd0a0fcdSRichard Henderson         uint16_t dd = aa > bb ? aa : bb;
1500dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1501dd0a0fcdSRichard Henderson     }
1502dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1503dd0a0fcdSRichard Henderson }
1504dd0a0fcdSRichard Henderson 
1505dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1506dd0a0fcdSRichard Henderson {
1507dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1508dd0a0fcdSRichard Henderson     intptr_t i;
1509dd0a0fcdSRichard Henderson 
1510dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1511dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1512dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1513dd0a0fcdSRichard Henderson         uint32_t dd = aa > bb ? aa : bb;
1514dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1515dd0a0fcdSRichard Henderson     }
1516dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1517dd0a0fcdSRichard Henderson }
1518dd0a0fcdSRichard Henderson 
1519dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1520dd0a0fcdSRichard Henderson {
1521dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1522dd0a0fcdSRichard Henderson     intptr_t i;
1523dd0a0fcdSRichard Henderson 
1524dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1525dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1526dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1527dd0a0fcdSRichard Henderson         uint64_t dd = aa > bb ? aa : bb;
1528dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1529dd0a0fcdSRichard Henderson     }
1530dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1531dd0a0fcdSRichard Henderson }
153238dc1294SRichard Henderson 
153338dc1294SRichard Henderson void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
153438dc1294SRichard Henderson {
153538dc1294SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
153638dc1294SRichard Henderson     intptr_t i;
153738dc1294SRichard Henderson 
15386c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
15396c7ab301SRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
15406c7ab301SRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
15416c7ab301SRichard Henderson         uint64_t cc = *(uint64_t *)(c + i);
15426c7ab301SRichard Henderson         *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
154338dc1294SRichard Henderson     }
154438dc1294SRichard Henderson     clear_high(d, oprsz, desc);
154538dc1294SRichard Henderson }
1546