xref: /qemu/accel/tcg/tcg-runtime-gvec.c (revision 5d0ceda902915e3f0e21c39d142c92c4e97c3ebb)
1db432672SRichard Henderson /*
2db432672SRichard Henderson  * Generic vectorized operation runtime
3db432672SRichard Henderson  *
4db432672SRichard Henderson  * Copyright (c) 2018 Linaro
5db432672SRichard Henderson  *
6db432672SRichard Henderson  * This library is free software; you can redistribute it and/or
7db432672SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
8db432672SRichard Henderson  * License as published by the Free Software Foundation; either
9fb0343d5SThomas Huth  * version 2.1 of the License, or (at your option) any later version.
10db432672SRichard Henderson  *
11db432672SRichard Henderson  * This library is distributed in the hope that it will be useful,
12db432672SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13db432672SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14db432672SRichard Henderson  * Lesser General Public License for more details.
15db432672SRichard Henderson  *
16db432672SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
17db432672SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18db432672SRichard Henderson  */
19db432672SRichard Henderson 
20db432672SRichard Henderson #include "qemu/osdep.h"
21db432672SRichard Henderson #include "qemu/host-utils.h"
22db432672SRichard Henderson #include "cpu.h"
23db432672SRichard Henderson #include "exec/helper-proto.h"
24dcb32f1dSPhilippe Mathieu-Daudé #include "tcg/tcg-gvec-desc.h"
25db432672SRichard Henderson 
26db432672SRichard Henderson 
27db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
28db432672SRichard Henderson {
29db432672SRichard Henderson     intptr_t maxsz = simd_maxsz(desc);
30db432672SRichard Henderson     intptr_t i;
31db432672SRichard Henderson 
32db432672SRichard Henderson     if (unlikely(maxsz > oprsz)) {
33db432672SRichard Henderson         for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
34db432672SRichard Henderson             *(uint64_t *)(d + i) = 0;
35db432672SRichard Henderson         }
36db432672SRichard Henderson     }
37db432672SRichard Henderson }
38db432672SRichard Henderson 
39db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
40db432672SRichard Henderson {
41db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
42db432672SRichard Henderson     intptr_t i;
43db432672SRichard Henderson 
446c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
456c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
46db432672SRichard Henderson     }
47db432672SRichard Henderson     clear_high(d, oprsz, desc);
48db432672SRichard Henderson }
49db432672SRichard Henderson 
50db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
51db432672SRichard Henderson {
52db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
53db432672SRichard Henderson     intptr_t i;
54db432672SRichard Henderson 
556c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
566c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
57db432672SRichard Henderson     }
58db432672SRichard Henderson     clear_high(d, oprsz, desc);
59db432672SRichard Henderson }
60db432672SRichard Henderson 
61db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
62db432672SRichard Henderson {
63db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
64db432672SRichard Henderson     intptr_t i;
65db432672SRichard Henderson 
666c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
676c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
68db432672SRichard Henderson     }
69db432672SRichard Henderson     clear_high(d, oprsz, desc);
70db432672SRichard Henderson }
71db432672SRichard Henderson 
72db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
73db432672SRichard Henderson {
74db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
75db432672SRichard Henderson     intptr_t i;
76db432672SRichard Henderson 
776c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
786c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
79db432672SRichard Henderson     }
80db432672SRichard Henderson     clear_high(d, oprsz, desc);
81db432672SRichard Henderson }
82db432672SRichard Henderson 
8322fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
8422fc3527SRichard Henderson {
8522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8622fc3527SRichard Henderson     intptr_t i;
8722fc3527SRichard Henderson 
886c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
890a83e43aSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
9022fc3527SRichard Henderson     }
9122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
9222fc3527SRichard Henderson }
9322fc3527SRichard Henderson 
9422fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
9522fc3527SRichard Henderson {
9622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9722fc3527SRichard Henderson     intptr_t i;
9822fc3527SRichard Henderson 
996c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1000a83e43aSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
10122fc3527SRichard Henderson     }
10222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
10322fc3527SRichard Henderson }
10422fc3527SRichard Henderson 
10522fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
10622fc3527SRichard Henderson {
10722fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
10822fc3527SRichard Henderson     intptr_t i;
10922fc3527SRichard Henderson 
1106c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1110a83e43aSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
11222fc3527SRichard Henderson     }
11322fc3527SRichard Henderson     clear_high(d, oprsz, desc);
11422fc3527SRichard Henderson }
11522fc3527SRichard Henderson 
11622fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
11722fc3527SRichard Henderson {
11822fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
11922fc3527SRichard Henderson     intptr_t i;
12022fc3527SRichard Henderson 
1216c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1220a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
12322fc3527SRichard Henderson     }
12422fc3527SRichard Henderson     clear_high(d, oprsz, desc);
12522fc3527SRichard Henderson }
12622fc3527SRichard Henderson 
127db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
128db432672SRichard Henderson {
129db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
130db432672SRichard Henderson     intptr_t i;
131db432672SRichard Henderson 
1326c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1336c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
134db432672SRichard Henderson     }
135db432672SRichard Henderson     clear_high(d, oprsz, desc);
136db432672SRichard Henderson }
137db432672SRichard Henderson 
138db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
139db432672SRichard Henderson {
140db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
141db432672SRichard Henderson     intptr_t i;
142db432672SRichard Henderson 
1436c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1446c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
145db432672SRichard Henderson     }
146db432672SRichard Henderson     clear_high(d, oprsz, desc);
147db432672SRichard Henderson }
148db432672SRichard Henderson 
149db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
150db432672SRichard Henderson {
151db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
152db432672SRichard Henderson     intptr_t i;
153db432672SRichard Henderson 
1546c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1556c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
156db432672SRichard Henderson     }
157db432672SRichard Henderson     clear_high(d, oprsz, desc);
158db432672SRichard Henderson }
159db432672SRichard Henderson 
160db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
161db432672SRichard Henderson {
162db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
163db432672SRichard Henderson     intptr_t i;
164db432672SRichard Henderson 
1656c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1666c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
167db432672SRichard Henderson     }
168db432672SRichard Henderson     clear_high(d, oprsz, desc);
169db432672SRichard Henderson }
170db432672SRichard Henderson 
17122fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
17222fc3527SRichard Henderson {
17322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
17422fc3527SRichard Henderson     intptr_t i;
17522fc3527SRichard Henderson 
1766c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1770a83e43aSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
17822fc3527SRichard Henderson     }
17922fc3527SRichard Henderson     clear_high(d, oprsz, desc);
18022fc3527SRichard Henderson }
18122fc3527SRichard Henderson 
18222fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
18322fc3527SRichard Henderson {
18422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
18522fc3527SRichard Henderson     intptr_t i;
18622fc3527SRichard Henderson 
1876c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1880a83e43aSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
18922fc3527SRichard Henderson     }
19022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
19122fc3527SRichard Henderson }
19222fc3527SRichard Henderson 
19322fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
19422fc3527SRichard Henderson {
19522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
19622fc3527SRichard Henderson     intptr_t i;
19722fc3527SRichard Henderson 
1986c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1990a83e43aSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
20022fc3527SRichard Henderson     }
20122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
20222fc3527SRichard Henderson }
20322fc3527SRichard Henderson 
20422fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
20522fc3527SRichard Henderson {
20622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
20722fc3527SRichard Henderson     intptr_t i;
20822fc3527SRichard Henderson 
2096c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2100a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
21122fc3527SRichard Henderson     }
21222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
21322fc3527SRichard Henderson }
21422fc3527SRichard Henderson 
2153774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
2163774030aSRichard Henderson {
2173774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2183774030aSRichard Henderson     intptr_t i;
2193774030aSRichard Henderson 
2206c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
2216c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
2223774030aSRichard Henderson     }
2233774030aSRichard Henderson     clear_high(d, oprsz, desc);
2243774030aSRichard Henderson }
2253774030aSRichard Henderson 
2263774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
2273774030aSRichard Henderson {
2283774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2293774030aSRichard Henderson     intptr_t i;
2303774030aSRichard Henderson 
2316c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
2326c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
2333774030aSRichard Henderson     }
2343774030aSRichard Henderson     clear_high(d, oprsz, desc);
2353774030aSRichard Henderson }
2363774030aSRichard Henderson 
2373774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
2383774030aSRichard Henderson {
2393774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2403774030aSRichard Henderson     intptr_t i;
2413774030aSRichard Henderson 
2426c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
2436c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
2443774030aSRichard Henderson     }
2453774030aSRichard Henderson     clear_high(d, oprsz, desc);
2463774030aSRichard Henderson }
2473774030aSRichard Henderson 
2483774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
2493774030aSRichard Henderson {
2503774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2513774030aSRichard Henderson     intptr_t i;
2523774030aSRichard Henderson 
2536c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2546c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
2553774030aSRichard Henderson     }
2563774030aSRichard Henderson     clear_high(d, oprsz, desc);
2573774030aSRichard Henderson }
2583774030aSRichard Henderson 
25922fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
26022fc3527SRichard Henderson {
26122fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
26222fc3527SRichard Henderson     intptr_t i;
26322fc3527SRichard Henderson 
2646c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
2650a83e43aSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
26622fc3527SRichard Henderson     }
26722fc3527SRichard Henderson     clear_high(d, oprsz, desc);
26822fc3527SRichard Henderson }
26922fc3527SRichard Henderson 
27022fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
27122fc3527SRichard Henderson {
27222fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
27322fc3527SRichard Henderson     intptr_t i;
27422fc3527SRichard Henderson 
2756c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
2760a83e43aSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
27722fc3527SRichard Henderson     }
27822fc3527SRichard Henderson     clear_high(d, oprsz, desc);
27922fc3527SRichard Henderson }
28022fc3527SRichard Henderson 
28122fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
28222fc3527SRichard Henderson {
28322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
28422fc3527SRichard Henderson     intptr_t i;
28522fc3527SRichard Henderson 
2866c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
2870a83e43aSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
28822fc3527SRichard Henderson     }
28922fc3527SRichard Henderson     clear_high(d, oprsz, desc);
29022fc3527SRichard Henderson }
29122fc3527SRichard Henderson 
29222fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
29322fc3527SRichard Henderson {
29422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
29522fc3527SRichard Henderson     intptr_t i;
29622fc3527SRichard Henderson 
2976c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2980a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
29922fc3527SRichard Henderson     }
30022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
30122fc3527SRichard Henderson }
30222fc3527SRichard Henderson 
303db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
304db432672SRichard Henderson {
305db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
306db432672SRichard Henderson     intptr_t i;
307db432672SRichard Henderson 
3086c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
3096c7ab301SRichard Henderson         *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
310db432672SRichard Henderson     }
311db432672SRichard Henderson     clear_high(d, oprsz, desc);
312db432672SRichard Henderson }
313db432672SRichard Henderson 
314db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
315db432672SRichard Henderson {
316db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
317db432672SRichard Henderson     intptr_t i;
318db432672SRichard Henderson 
3196c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
3206c7ab301SRichard Henderson         *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
321db432672SRichard Henderson     }
322db432672SRichard Henderson     clear_high(d, oprsz, desc);
323db432672SRichard Henderson }
324db432672SRichard Henderson 
325db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
326db432672SRichard Henderson {
327db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
328db432672SRichard Henderson     intptr_t i;
329db432672SRichard Henderson 
3306c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
3316c7ab301SRichard Henderson         *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
332db432672SRichard Henderson     }
333db432672SRichard Henderson     clear_high(d, oprsz, desc);
334db432672SRichard Henderson }
335db432672SRichard Henderson 
336db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
337db432672SRichard Henderson {
338db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
339db432672SRichard Henderson     intptr_t i;
340db432672SRichard Henderson 
3416c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
3426c7ab301SRichard Henderson         *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
343db432672SRichard Henderson     }
344db432672SRichard Henderson     clear_high(d, oprsz, desc);
345db432672SRichard Henderson }
346db432672SRichard Henderson 
347bcefc902SRichard Henderson void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
348bcefc902SRichard Henderson {
349bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
350bcefc902SRichard Henderson     intptr_t i;
351bcefc902SRichard Henderson 
352bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
353bcefc902SRichard Henderson         int8_t aa = *(int8_t *)(a + i);
354bcefc902SRichard Henderson         *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
355bcefc902SRichard Henderson     }
356bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
357bcefc902SRichard Henderson }
358bcefc902SRichard Henderson 
359bcefc902SRichard Henderson void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
360bcefc902SRichard Henderson {
361bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
362bcefc902SRichard Henderson     intptr_t i;
363bcefc902SRichard Henderson 
364bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
365bcefc902SRichard Henderson         int16_t aa = *(int16_t *)(a + i);
366bcefc902SRichard Henderson         *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
367bcefc902SRichard Henderson     }
368bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
369bcefc902SRichard Henderson }
370bcefc902SRichard Henderson 
371bcefc902SRichard Henderson void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
372bcefc902SRichard Henderson {
373bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
374bcefc902SRichard Henderson     intptr_t i;
375bcefc902SRichard Henderson 
376bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
377bcefc902SRichard Henderson         int32_t aa = *(int32_t *)(a + i);
378bcefc902SRichard Henderson         *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
379bcefc902SRichard Henderson     }
380bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
381bcefc902SRichard Henderson }
382bcefc902SRichard Henderson 
383bcefc902SRichard Henderson void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
384bcefc902SRichard Henderson {
385bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
386bcefc902SRichard Henderson     intptr_t i;
387bcefc902SRichard Henderson 
388bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
389bcefc902SRichard Henderson         int64_t aa = *(int64_t *)(a + i);
390bcefc902SRichard Henderson         *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
391bcefc902SRichard Henderson     }
392bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
393bcefc902SRichard Henderson }
394bcefc902SRichard Henderson 
395db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
396db432672SRichard Henderson {
397db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
398db432672SRichard Henderson 
399db432672SRichard Henderson     memcpy(d, a, oprsz);
400db432672SRichard Henderson     clear_high(d, oprsz, desc);
401db432672SRichard Henderson }
402db432672SRichard Henderson 
403db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
404db432672SRichard Henderson {
405db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
406db432672SRichard Henderson     intptr_t i;
407db432672SRichard Henderson 
408db432672SRichard Henderson     if (c == 0) {
409db432672SRichard Henderson         oprsz = 0;
410db432672SRichard Henderson     } else {
411db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
412db432672SRichard Henderson             *(uint64_t *)(d + i) = c;
413db432672SRichard Henderson         }
414db432672SRichard Henderson     }
415db432672SRichard Henderson     clear_high(d, oprsz, desc);
416db432672SRichard Henderson }
417db432672SRichard Henderson 
418db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
419db432672SRichard Henderson {
420db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
421db432672SRichard Henderson     intptr_t i;
422db432672SRichard Henderson 
423db432672SRichard Henderson     if (c == 0) {
424db432672SRichard Henderson         oprsz = 0;
425db432672SRichard Henderson     } else {
426db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
427db432672SRichard Henderson             *(uint32_t *)(d + i) = c;
428db432672SRichard Henderson         }
429db432672SRichard Henderson     }
430db432672SRichard Henderson     clear_high(d, oprsz, desc);
431db432672SRichard Henderson }
432db432672SRichard Henderson 
433db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
434db432672SRichard Henderson {
435db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
436db432672SRichard Henderson }
437db432672SRichard Henderson 
438db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
439db432672SRichard Henderson {
440db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
441db432672SRichard Henderson }
442db432672SRichard Henderson 
443db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
444db432672SRichard Henderson {
445db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
446db432672SRichard Henderson     intptr_t i;
447db432672SRichard Henderson 
4486c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4496c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
450db432672SRichard Henderson     }
451db432672SRichard Henderson     clear_high(d, oprsz, desc);
452db432672SRichard Henderson }
453db432672SRichard Henderson 
454db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
455db432672SRichard Henderson {
456db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
457db432672SRichard Henderson     intptr_t i;
458db432672SRichard Henderson 
4596c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4606c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
461db432672SRichard Henderson     }
462db432672SRichard Henderson     clear_high(d, oprsz, desc);
463db432672SRichard Henderson }
464db432672SRichard Henderson 
465db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
466db432672SRichard Henderson {
467db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
468db432672SRichard Henderson     intptr_t i;
469db432672SRichard Henderson 
4706c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4716c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
472db432672SRichard Henderson     }
473db432672SRichard Henderson     clear_high(d, oprsz, desc);
474db432672SRichard Henderson }
475db432672SRichard Henderson 
476db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
477db432672SRichard Henderson {
478db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
479db432672SRichard Henderson     intptr_t i;
480db432672SRichard Henderson 
4816c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4826c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
483db432672SRichard Henderson     }
484db432672SRichard Henderson     clear_high(d, oprsz, desc);
485db432672SRichard Henderson }
486db432672SRichard Henderson 
487db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
488db432672SRichard Henderson {
489db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
490db432672SRichard Henderson     intptr_t i;
491db432672SRichard Henderson 
4926c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4936c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
494db432672SRichard Henderson     }
495db432672SRichard Henderson     clear_high(d, oprsz, desc);
496db432672SRichard Henderson }
497db432672SRichard Henderson 
498db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
499db432672SRichard Henderson {
500db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
501db432672SRichard Henderson     intptr_t i;
502db432672SRichard Henderson 
5036c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5046c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
505db432672SRichard Henderson     }
506db432672SRichard Henderson     clear_high(d, oprsz, desc);
507db432672SRichard Henderson }
508d0ec9796SRichard Henderson 
509f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
510f550805dSRichard Henderson {
511f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
512f550805dSRichard Henderson     intptr_t i;
513f550805dSRichard Henderson 
5146c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5156c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
516f550805dSRichard Henderson     }
517f550805dSRichard Henderson     clear_high(d, oprsz, desc);
518f550805dSRichard Henderson }
519f550805dSRichard Henderson 
520f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
521f550805dSRichard Henderson {
522f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
523f550805dSRichard Henderson     intptr_t i;
524f550805dSRichard Henderson 
5256c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5266c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
527f550805dSRichard Henderson     }
528f550805dSRichard Henderson     clear_high(d, oprsz, desc);
529f550805dSRichard Henderson }
530f550805dSRichard Henderson 
531f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
532f550805dSRichard Henderson {
533f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
534f550805dSRichard Henderson     intptr_t i;
535f550805dSRichard Henderson 
5366c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5376c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
538f550805dSRichard Henderson     }
539f550805dSRichard Henderson     clear_high(d, oprsz, desc);
540f550805dSRichard Henderson }
541f550805dSRichard Henderson 
54222fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
54322fc3527SRichard Henderson {
54422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
54522fc3527SRichard Henderson     intptr_t i;
54622fc3527SRichard Henderson 
5476c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5480a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
54922fc3527SRichard Henderson     }
55022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
55122fc3527SRichard Henderson }
55222fc3527SRichard Henderson 
55322fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
55422fc3527SRichard Henderson {
55522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
55622fc3527SRichard Henderson     intptr_t i;
55722fc3527SRichard Henderson 
5586c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5590a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
56022fc3527SRichard Henderson     }
56122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
56222fc3527SRichard Henderson }
56322fc3527SRichard Henderson 
56422fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
56522fc3527SRichard Henderson {
56622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
56722fc3527SRichard Henderson     intptr_t i;
56822fc3527SRichard Henderson 
5696c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5700a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
57122fc3527SRichard Henderson     }
57222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
57322fc3527SRichard Henderson }
57422fc3527SRichard Henderson 
575d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
576d0ec9796SRichard Henderson {
577d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
578d0ec9796SRichard Henderson     int shift = simd_data(desc);
579d0ec9796SRichard Henderson     intptr_t i;
580d0ec9796SRichard Henderson 
5816c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
5826c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
583d0ec9796SRichard Henderson     }
584d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
585d0ec9796SRichard Henderson }
586d0ec9796SRichard Henderson 
587d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
588d0ec9796SRichard Henderson {
589d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
590d0ec9796SRichard Henderson     int shift = simd_data(desc);
591d0ec9796SRichard Henderson     intptr_t i;
592d0ec9796SRichard Henderson 
5936c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
5946c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
595d0ec9796SRichard Henderson     }
596d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
597d0ec9796SRichard Henderson }
598d0ec9796SRichard Henderson 
599d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
600d0ec9796SRichard Henderson {
601d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
602d0ec9796SRichard Henderson     int shift = simd_data(desc);
603d0ec9796SRichard Henderson     intptr_t i;
604d0ec9796SRichard Henderson 
6056c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
6066c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
607d0ec9796SRichard Henderson     }
608d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
609d0ec9796SRichard Henderson }
610d0ec9796SRichard Henderson 
611d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
612d0ec9796SRichard Henderson {
613d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
614d0ec9796SRichard Henderson     int shift = simd_data(desc);
615d0ec9796SRichard Henderson     intptr_t i;
616d0ec9796SRichard Henderson 
6176c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
6186c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
619d0ec9796SRichard Henderson     }
620d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
621d0ec9796SRichard Henderson }
622d0ec9796SRichard Henderson 
623d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
624d0ec9796SRichard Henderson {
625d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
626d0ec9796SRichard Henderson     int shift = simd_data(desc);
627d0ec9796SRichard Henderson     intptr_t i;
628d0ec9796SRichard Henderson 
6296c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
6306c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
631d0ec9796SRichard Henderson     }
632d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
633d0ec9796SRichard Henderson }
634d0ec9796SRichard Henderson 
635d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
636d0ec9796SRichard Henderson {
637d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
638d0ec9796SRichard Henderson     int shift = simd_data(desc);
639d0ec9796SRichard Henderson     intptr_t i;
640d0ec9796SRichard Henderson 
6416c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
6426c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
643d0ec9796SRichard Henderson     }
644d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
645d0ec9796SRichard Henderson }
646d0ec9796SRichard Henderson 
647d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
648d0ec9796SRichard Henderson {
649d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
650d0ec9796SRichard Henderson     int shift = simd_data(desc);
651d0ec9796SRichard Henderson     intptr_t i;
652d0ec9796SRichard Henderson 
6536c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
6546c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
655d0ec9796SRichard Henderson     }
656d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
657d0ec9796SRichard Henderson }
658d0ec9796SRichard Henderson 
659d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
660d0ec9796SRichard Henderson {
661d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
662d0ec9796SRichard Henderson     int shift = simd_data(desc);
663d0ec9796SRichard Henderson     intptr_t i;
664d0ec9796SRichard Henderson 
6656c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
6666c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
667d0ec9796SRichard Henderson     }
668d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
669d0ec9796SRichard Henderson }
670d0ec9796SRichard Henderson 
671d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
672d0ec9796SRichard Henderson {
673d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
674d0ec9796SRichard Henderson     int shift = simd_data(desc);
675d0ec9796SRichard Henderson     intptr_t i;
676d0ec9796SRichard Henderson 
6776c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
6786c7ab301SRichard Henderson         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
679d0ec9796SRichard Henderson     }
680d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
681d0ec9796SRichard Henderson }
682d0ec9796SRichard Henderson 
683d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
684d0ec9796SRichard Henderson {
685d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
686d0ec9796SRichard Henderson     int shift = simd_data(desc);
687d0ec9796SRichard Henderson     intptr_t i;
688d0ec9796SRichard Henderson 
6896c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
6906c7ab301SRichard Henderson         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
691d0ec9796SRichard Henderson     }
692d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
693d0ec9796SRichard Henderson }
694d0ec9796SRichard Henderson 
695d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
696d0ec9796SRichard Henderson {
697d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
698d0ec9796SRichard Henderson     int shift = simd_data(desc);
699d0ec9796SRichard Henderson     intptr_t i;
700d0ec9796SRichard Henderson 
7016c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
7026c7ab301SRichard Henderson         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
703d0ec9796SRichard Henderson     }
704d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
705d0ec9796SRichard Henderson }
706d0ec9796SRichard Henderson 
707d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
708d0ec9796SRichard Henderson {
709d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
710d0ec9796SRichard Henderson     int shift = simd_data(desc);
711d0ec9796SRichard Henderson     intptr_t i;
712d0ec9796SRichard Henderson 
7136c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
7146c7ab301SRichard Henderson         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
715d0ec9796SRichard Henderson     }
716d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
717d0ec9796SRichard Henderson }
718212be173SRichard Henderson 
719b0f7e744SRichard Henderson void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
720b0f7e744SRichard Henderson {
721b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
722b0f7e744SRichard Henderson     int shift = simd_data(desc);
723b0f7e744SRichard Henderson     intptr_t i;
724b0f7e744SRichard Henderson 
725b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
726b0f7e744SRichard Henderson         *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
727b0f7e744SRichard Henderson     }
728b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
729b0f7e744SRichard Henderson }
730b0f7e744SRichard Henderson 
731b0f7e744SRichard Henderson void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
732b0f7e744SRichard Henderson {
733b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
734b0f7e744SRichard Henderson     int shift = simd_data(desc);
735b0f7e744SRichard Henderson     intptr_t i;
736b0f7e744SRichard Henderson 
737b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
738b0f7e744SRichard Henderson         *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
739b0f7e744SRichard Henderson     }
740b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
741b0f7e744SRichard Henderson }
742b0f7e744SRichard Henderson 
743b0f7e744SRichard Henderson void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
744b0f7e744SRichard Henderson {
745b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
746b0f7e744SRichard Henderson     int shift = simd_data(desc);
747b0f7e744SRichard Henderson     intptr_t i;
748b0f7e744SRichard Henderson 
749b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
750b0f7e744SRichard Henderson         *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
751b0f7e744SRichard Henderson     }
752b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
753b0f7e744SRichard Henderson }
754b0f7e744SRichard Henderson 
755b0f7e744SRichard Henderson void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
756b0f7e744SRichard Henderson {
757b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
758b0f7e744SRichard Henderson     int shift = simd_data(desc);
759b0f7e744SRichard Henderson     intptr_t i;
760b0f7e744SRichard Henderson 
761b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
762b0f7e744SRichard Henderson         *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
763b0f7e744SRichard Henderson     }
764b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
765b0f7e744SRichard Henderson }
766b0f7e744SRichard Henderson 
7675ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
7685ee5c14cSRichard Henderson {
7695ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7705ee5c14cSRichard Henderson     intptr_t i;
7715ee5c14cSRichard Henderson 
7725ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
7735ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
7745ee5c14cSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
7755ee5c14cSRichard Henderson     }
7765ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7775ee5c14cSRichard Henderson }
7785ee5c14cSRichard Henderson 
7795ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
7805ee5c14cSRichard Henderson {
7815ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7825ee5c14cSRichard Henderson     intptr_t i;
7835ee5c14cSRichard Henderson 
7845ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
7855ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
7865ee5c14cSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
7875ee5c14cSRichard Henderson     }
7885ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7895ee5c14cSRichard Henderson }
7905ee5c14cSRichard Henderson 
7915ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
7925ee5c14cSRichard Henderson {
7935ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7945ee5c14cSRichard Henderson     intptr_t i;
7955ee5c14cSRichard Henderson 
7965ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
7975ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
7985ee5c14cSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
7995ee5c14cSRichard Henderson     }
8005ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8015ee5c14cSRichard Henderson }
8025ee5c14cSRichard Henderson 
8035ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
8045ee5c14cSRichard Henderson {
8055ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8065ee5c14cSRichard Henderson     intptr_t i;
8075ee5c14cSRichard Henderson 
8085ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8095ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
8105ee5c14cSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
8115ee5c14cSRichard Henderson     }
8125ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8135ee5c14cSRichard Henderson }
8145ee5c14cSRichard Henderson 
8155ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
8165ee5c14cSRichard Henderson {
8175ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8185ee5c14cSRichard Henderson     intptr_t i;
8195ee5c14cSRichard Henderson 
8205ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
8215ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
8225ee5c14cSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
8235ee5c14cSRichard Henderson     }
8245ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8255ee5c14cSRichard Henderson }
8265ee5c14cSRichard Henderson 
8275ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
8285ee5c14cSRichard Henderson {
8295ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8305ee5c14cSRichard Henderson     intptr_t i;
8315ee5c14cSRichard Henderson 
8325ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
8335ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
8345ee5c14cSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
8355ee5c14cSRichard Henderson     }
8365ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8375ee5c14cSRichard Henderson }
8385ee5c14cSRichard Henderson 
8395ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
8405ee5c14cSRichard Henderson {
8415ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8425ee5c14cSRichard Henderson     intptr_t i;
8435ee5c14cSRichard Henderson 
8445ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
8455ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
8465ee5c14cSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
8475ee5c14cSRichard Henderson     }
8485ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8495ee5c14cSRichard Henderson }
8505ee5c14cSRichard Henderson 
8515ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
8525ee5c14cSRichard Henderson {
8535ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8545ee5c14cSRichard Henderson     intptr_t i;
8555ee5c14cSRichard Henderson 
8565ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8575ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
8585ee5c14cSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
8595ee5c14cSRichard Henderson     }
8605ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8615ee5c14cSRichard Henderson }
8625ee5c14cSRichard Henderson 
8635ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
8645ee5c14cSRichard Henderson {
8655ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8665ee5c14cSRichard Henderson     intptr_t i;
8675ee5c14cSRichard Henderson 
868899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
8695ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
8705ee5c14cSRichard Henderson         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
8715ee5c14cSRichard Henderson     }
8725ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8735ee5c14cSRichard Henderson }
8745ee5c14cSRichard Henderson 
8755ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
8765ee5c14cSRichard Henderson {
8775ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8785ee5c14cSRichard Henderson     intptr_t i;
8795ee5c14cSRichard Henderson 
8805ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
8815ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
8825ee5c14cSRichard Henderson         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
8835ee5c14cSRichard Henderson     }
8845ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8855ee5c14cSRichard Henderson }
8865ee5c14cSRichard Henderson 
8875ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
8885ee5c14cSRichard Henderson {
8895ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8905ee5c14cSRichard Henderson     intptr_t i;
8915ee5c14cSRichard Henderson 
892899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
8935ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
8945ee5c14cSRichard Henderson         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
8955ee5c14cSRichard Henderson     }
8965ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8975ee5c14cSRichard Henderson }
8985ee5c14cSRichard Henderson 
8995ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
9005ee5c14cSRichard Henderson {
9015ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9025ee5c14cSRichard Henderson     intptr_t i;
9035ee5c14cSRichard Henderson 
904899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
9055ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
9065ee5c14cSRichard Henderson         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
9075ee5c14cSRichard Henderson     }
9085ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
9095ee5c14cSRichard Henderson }
9105ee5c14cSRichard Henderson 
911*5d0ceda9SRichard Henderson void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
912*5d0ceda9SRichard Henderson {
913*5d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
914*5d0ceda9SRichard Henderson     intptr_t i;
915*5d0ceda9SRichard Henderson 
916*5d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
917*5d0ceda9SRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
918*5d0ceda9SRichard Henderson         *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
919*5d0ceda9SRichard Henderson     }
920*5d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
921*5d0ceda9SRichard Henderson }
922*5d0ceda9SRichard Henderson 
923*5d0ceda9SRichard Henderson void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
924*5d0ceda9SRichard Henderson {
925*5d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
926*5d0ceda9SRichard Henderson     intptr_t i;
927*5d0ceda9SRichard Henderson 
928*5d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
929*5d0ceda9SRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
930*5d0ceda9SRichard Henderson         *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
931*5d0ceda9SRichard Henderson     }
932*5d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
933*5d0ceda9SRichard Henderson }
934*5d0ceda9SRichard Henderson 
935*5d0ceda9SRichard Henderson void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
936*5d0ceda9SRichard Henderson {
937*5d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
938*5d0ceda9SRichard Henderson     intptr_t i;
939*5d0ceda9SRichard Henderson 
940*5d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
941*5d0ceda9SRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
942*5d0ceda9SRichard Henderson         *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
943*5d0ceda9SRichard Henderson     }
944*5d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
945*5d0ceda9SRichard Henderson }
946*5d0ceda9SRichard Henderson 
947*5d0ceda9SRichard Henderson void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
948*5d0ceda9SRichard Henderson {
949*5d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
950*5d0ceda9SRichard Henderson     intptr_t i;
951*5d0ceda9SRichard Henderson 
952*5d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
953*5d0ceda9SRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
954*5d0ceda9SRichard Henderson         *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
955*5d0ceda9SRichard Henderson     }
956*5d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
957*5d0ceda9SRichard Henderson }
958*5d0ceda9SRichard Henderson 
959*5d0ceda9SRichard Henderson void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
960*5d0ceda9SRichard Henderson {
961*5d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
962*5d0ceda9SRichard Henderson     intptr_t i;
963*5d0ceda9SRichard Henderson 
964*5d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
965*5d0ceda9SRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
966*5d0ceda9SRichard Henderson         *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
967*5d0ceda9SRichard Henderson     }
968*5d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
969*5d0ceda9SRichard Henderson }
970*5d0ceda9SRichard Henderson 
971*5d0ceda9SRichard Henderson void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
972*5d0ceda9SRichard Henderson {
973*5d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
974*5d0ceda9SRichard Henderson     intptr_t i;
975*5d0ceda9SRichard Henderson 
976*5d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
977*5d0ceda9SRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
978*5d0ceda9SRichard Henderson         *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
979*5d0ceda9SRichard Henderson     }
980*5d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
981*5d0ceda9SRichard Henderson }
982*5d0ceda9SRichard Henderson 
983*5d0ceda9SRichard Henderson void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
984*5d0ceda9SRichard Henderson {
985*5d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
986*5d0ceda9SRichard Henderson     intptr_t i;
987*5d0ceda9SRichard Henderson 
988*5d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
989*5d0ceda9SRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
990*5d0ceda9SRichard Henderson         *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
991*5d0ceda9SRichard Henderson     }
992*5d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
993*5d0ceda9SRichard Henderson }
994*5d0ceda9SRichard Henderson 
995*5d0ceda9SRichard Henderson void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
996*5d0ceda9SRichard Henderson {
997*5d0ceda9SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
998*5d0ceda9SRichard Henderson     intptr_t i;
999*5d0ceda9SRichard Henderson 
1000*5d0ceda9SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1001*5d0ceda9SRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
1002*5d0ceda9SRichard Henderson         *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
1003*5d0ceda9SRichard Henderson     }
1004*5d0ceda9SRichard Henderson     clear_high(d, oprsz, desc);
1005*5d0ceda9SRichard Henderson }
1006*5d0ceda9SRichard Henderson 
1007212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP)                                            \
1008212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
1009212be173SRichard Henderson {                                                                          \
1010212be173SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);                                     \
1011212be173SRichard Henderson     intptr_t i;                                                            \
10126cb1d3b8SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
10130270bd50SRichard Henderson         *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i));        \
1014212be173SRichard Henderson     }                                                                      \
1015212be173SRichard Henderson     clear_high(d, oprsz, desc);                                            \
1016212be173SRichard Henderson }
1017212be173SRichard Henderson 
1018212be173SRichard Henderson #define DO_CMP2(SZ) \
10196c7ab301SRichard Henderson     DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==)    \
10206c7ab301SRichard Henderson     DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=)    \
10216c7ab301SRichard Henderson     DO_CMP1(gvec_lt##SZ, int##SZ##_t, <)      \
10226c7ab301SRichard Henderson     DO_CMP1(gvec_le##SZ, int##SZ##_t, <=)     \
10236c7ab301SRichard Henderson     DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <)    \
10246c7ab301SRichard Henderson     DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
1025212be173SRichard Henderson 
1026212be173SRichard Henderson DO_CMP2(8)
1027212be173SRichard Henderson DO_CMP2(16)
1028212be173SRichard Henderson DO_CMP2(32)
1029212be173SRichard Henderson DO_CMP2(64)
1030212be173SRichard Henderson 
1031212be173SRichard Henderson #undef DO_CMP1
1032212be173SRichard Henderson #undef DO_CMP2
1033f49b12c6SRichard Henderson 
1034f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
1035f49b12c6SRichard Henderson {
1036f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1037f49b12c6SRichard Henderson     intptr_t i;
1038f49b12c6SRichard Henderson 
1039f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1040f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
1041f49b12c6SRichard Henderson         if (r > INT8_MAX) {
1042f49b12c6SRichard Henderson             r = INT8_MAX;
1043f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
1044f49b12c6SRichard Henderson             r = INT8_MIN;
1045f49b12c6SRichard Henderson         }
1046f49b12c6SRichard Henderson         *(int8_t *)(d + i) = r;
1047f49b12c6SRichard Henderson     }
1048f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1049f49b12c6SRichard Henderson }
1050f49b12c6SRichard Henderson 
1051f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
1052f49b12c6SRichard Henderson {
1053f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1054f49b12c6SRichard Henderson     intptr_t i;
1055f49b12c6SRichard Henderson 
1056f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1057f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
1058f49b12c6SRichard Henderson         if (r > INT16_MAX) {
1059f49b12c6SRichard Henderson             r = INT16_MAX;
1060f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
1061f49b12c6SRichard Henderson             r = INT16_MIN;
1062f49b12c6SRichard Henderson         }
1063f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
1064f49b12c6SRichard Henderson     }
1065f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1066f49b12c6SRichard Henderson }
1067f49b12c6SRichard Henderson 
1068f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
1069f49b12c6SRichard Henderson {
1070f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1071f49b12c6SRichard Henderson     intptr_t i;
1072f49b12c6SRichard Henderson 
1073f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1074f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
1075f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
1076f49b12c6SRichard Henderson         int32_t di = ai + bi;
1077f49b12c6SRichard Henderson         if (((di ^ ai) &~ (ai ^ bi)) < 0) {
1078f49b12c6SRichard Henderson             /* Signed overflow.  */
1079f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
1080f49b12c6SRichard Henderson         }
1081f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
1082f49b12c6SRichard Henderson     }
1083f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1084f49b12c6SRichard Henderson }
1085f49b12c6SRichard Henderson 
1086f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1087f49b12c6SRichard Henderson {
1088f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1089f49b12c6SRichard Henderson     intptr_t i;
1090f49b12c6SRichard Henderson 
1091f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1092f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
1093f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
1094f49b12c6SRichard Henderson         int64_t di = ai + bi;
1095f49b12c6SRichard Henderson         if (((di ^ ai) &~ (ai ^ bi)) < 0) {
1096f49b12c6SRichard Henderson             /* Signed overflow.  */
1097f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
1098f49b12c6SRichard Henderson         }
1099f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
1100f49b12c6SRichard Henderson     }
1101f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1102f49b12c6SRichard Henderson }
1103f49b12c6SRichard Henderson 
1104f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1105f49b12c6SRichard Henderson {
1106f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1107f49b12c6SRichard Henderson     intptr_t i;
1108f49b12c6SRichard Henderson 
1109f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1110f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1111f49b12c6SRichard Henderson         if (r > INT8_MAX) {
1112f49b12c6SRichard Henderson             r = INT8_MAX;
1113f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
1114f49b12c6SRichard Henderson             r = INT8_MIN;
1115f49b12c6SRichard Henderson         }
1116f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1117f49b12c6SRichard Henderson     }
1118f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1119f49b12c6SRichard Henderson }
1120f49b12c6SRichard Henderson 
1121f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1122f49b12c6SRichard Henderson {
1123f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1124f49b12c6SRichard Henderson     intptr_t i;
1125f49b12c6SRichard Henderson 
1126f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1127f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1128f49b12c6SRichard Henderson         if (r > INT16_MAX) {
1129f49b12c6SRichard Henderson             r = INT16_MAX;
1130f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
1131f49b12c6SRichard Henderson             r = INT16_MIN;
1132f49b12c6SRichard Henderson         }
1133f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
1134f49b12c6SRichard Henderson     }
1135f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1136f49b12c6SRichard Henderson }
1137f49b12c6SRichard Henderson 
1138f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1139f49b12c6SRichard Henderson {
1140f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1141f49b12c6SRichard Henderson     intptr_t i;
1142f49b12c6SRichard Henderson 
1143f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1144f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
1145f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
1146f49b12c6SRichard Henderson         int32_t di = ai - bi;
1147f49b12c6SRichard Henderson         if (((di ^ ai) & (ai ^ bi)) < 0) {
1148f49b12c6SRichard Henderson             /* Signed overflow.  */
1149f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
1150f49b12c6SRichard Henderson         }
1151f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
1152f49b12c6SRichard Henderson     }
1153f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1154f49b12c6SRichard Henderson }
1155f49b12c6SRichard Henderson 
1156f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1157f49b12c6SRichard Henderson {
1158f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1159f49b12c6SRichard Henderson     intptr_t i;
1160f49b12c6SRichard Henderson 
1161f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1162f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
1163f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
1164f49b12c6SRichard Henderson         int64_t di = ai - bi;
1165f49b12c6SRichard Henderson         if (((di ^ ai) & (ai ^ bi)) < 0) {
1166f49b12c6SRichard Henderson             /* Signed overflow.  */
1167f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
1168f49b12c6SRichard Henderson         }
1169f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
1170f49b12c6SRichard Henderson     }
1171f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1172f49b12c6SRichard Henderson }
1173f49b12c6SRichard Henderson 
1174f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1175f49b12c6SRichard Henderson {
1176f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1177f49b12c6SRichard Henderson     intptr_t i;
1178f49b12c6SRichard Henderson 
1179f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1180f49b12c6SRichard Henderson         unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1181f49b12c6SRichard Henderson         if (r > UINT8_MAX) {
1182f49b12c6SRichard Henderson             r = UINT8_MAX;
1183f49b12c6SRichard Henderson         }
1184f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1185f49b12c6SRichard Henderson     }
1186f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1187f49b12c6SRichard Henderson }
1188f49b12c6SRichard Henderson 
1189f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1190f49b12c6SRichard Henderson {
1191f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1192f49b12c6SRichard Henderson     intptr_t i;
1193f49b12c6SRichard Henderson 
1194f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1195f49b12c6SRichard Henderson         unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1196f49b12c6SRichard Henderson         if (r > UINT16_MAX) {
1197f49b12c6SRichard Henderson             r = UINT16_MAX;
1198f49b12c6SRichard Henderson         }
1199f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
1200f49b12c6SRichard Henderson     }
1201f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1202f49b12c6SRichard Henderson }
1203f49b12c6SRichard Henderson 
1204f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1205f49b12c6SRichard Henderson {
1206f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1207f49b12c6SRichard Henderson     intptr_t i;
1208f49b12c6SRichard Henderson 
1209f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1210f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1211f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
1212f49b12c6SRichard Henderson         uint32_t di = ai + bi;
1213f49b12c6SRichard Henderson         if (di < ai) {
1214f49b12c6SRichard Henderson             di = UINT32_MAX;
1215f49b12c6SRichard Henderson         }
1216f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1217f49b12c6SRichard Henderson     }
1218f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1219f49b12c6SRichard Henderson }
1220f49b12c6SRichard Henderson 
1221f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1222f49b12c6SRichard Henderson {
1223f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1224f49b12c6SRichard Henderson     intptr_t i;
1225f49b12c6SRichard Henderson 
1226f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1227f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1228f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
1229f49b12c6SRichard Henderson         uint64_t di = ai + bi;
1230f49b12c6SRichard Henderson         if (di < ai) {
1231f49b12c6SRichard Henderson             di = UINT64_MAX;
1232f49b12c6SRichard Henderson         }
1233f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1234f49b12c6SRichard Henderson     }
1235f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1236f49b12c6SRichard Henderson }
1237f49b12c6SRichard Henderson 
1238f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1239f49b12c6SRichard Henderson {
1240f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1241f49b12c6SRichard Henderson     intptr_t i;
1242f49b12c6SRichard Henderson 
1243f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1244f49b12c6SRichard Henderson         int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1245f49b12c6SRichard Henderson         if (r < 0) {
1246f49b12c6SRichard Henderson             r = 0;
1247f49b12c6SRichard Henderson         }
1248f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1249f49b12c6SRichard Henderson     }
1250f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1251f49b12c6SRichard Henderson }
1252f49b12c6SRichard Henderson 
1253f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1254f49b12c6SRichard Henderson {
1255f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1256f49b12c6SRichard Henderson     intptr_t i;
1257f49b12c6SRichard Henderson 
1258f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1259f49b12c6SRichard Henderson         int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1260f49b12c6SRichard Henderson         if (r < 0) {
1261f49b12c6SRichard Henderson             r = 0;
1262f49b12c6SRichard Henderson         }
1263f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
1264f49b12c6SRichard Henderson     }
1265f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1266f49b12c6SRichard Henderson }
1267f49b12c6SRichard Henderson 
1268f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1269f49b12c6SRichard Henderson {
1270f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1271f49b12c6SRichard Henderson     intptr_t i;
1272f49b12c6SRichard Henderson 
1273f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1274f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1275f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
1276f49b12c6SRichard Henderson         uint32_t di = ai - bi;
1277f49b12c6SRichard Henderson         if (ai < bi) {
1278f49b12c6SRichard Henderson             di = 0;
1279f49b12c6SRichard Henderson         }
1280f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1281f49b12c6SRichard Henderson     }
1282f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1283f49b12c6SRichard Henderson }
1284f49b12c6SRichard Henderson 
1285f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1286f49b12c6SRichard Henderson {
1287f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1288f49b12c6SRichard Henderson     intptr_t i;
1289f49b12c6SRichard Henderson 
1290f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1291f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1292f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
1293f49b12c6SRichard Henderson         uint64_t di = ai - bi;
1294f49b12c6SRichard Henderson         if (ai < bi) {
1295f49b12c6SRichard Henderson             di = 0;
1296f49b12c6SRichard Henderson         }
1297f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1298f49b12c6SRichard Henderson     }
1299f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1300f49b12c6SRichard Henderson }
1301dd0a0fcdSRichard Henderson 
1302dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1303dd0a0fcdSRichard Henderson {
1304dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1305dd0a0fcdSRichard Henderson     intptr_t i;
1306dd0a0fcdSRichard Henderson 
1307dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1308dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1309dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1310dd0a0fcdSRichard Henderson         int8_t dd = aa < bb ? aa : bb;
1311dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1312dd0a0fcdSRichard Henderson     }
1313dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1314dd0a0fcdSRichard Henderson }
1315dd0a0fcdSRichard Henderson 
1316dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1317dd0a0fcdSRichard Henderson {
1318dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1319dd0a0fcdSRichard Henderson     intptr_t i;
1320dd0a0fcdSRichard Henderson 
1321dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1322dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1323dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1324dd0a0fcdSRichard Henderson         int16_t dd = aa < bb ? aa : bb;
1325dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1326dd0a0fcdSRichard Henderson     }
1327dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1328dd0a0fcdSRichard Henderson }
1329dd0a0fcdSRichard Henderson 
1330dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1331dd0a0fcdSRichard Henderson {
1332dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1333dd0a0fcdSRichard Henderson     intptr_t i;
1334dd0a0fcdSRichard Henderson 
1335dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1336dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1337dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1338dd0a0fcdSRichard Henderson         int32_t dd = aa < bb ? aa : bb;
1339dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1340dd0a0fcdSRichard Henderson     }
1341dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1342dd0a0fcdSRichard Henderson }
1343dd0a0fcdSRichard Henderson 
1344dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1345dd0a0fcdSRichard Henderson {
1346dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1347dd0a0fcdSRichard Henderson     intptr_t i;
1348dd0a0fcdSRichard Henderson 
1349dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1350dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1351dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1352dd0a0fcdSRichard Henderson         int64_t dd = aa < bb ? aa : bb;
1353dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1354dd0a0fcdSRichard Henderson     }
1355dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1356dd0a0fcdSRichard Henderson }
1357dd0a0fcdSRichard Henderson 
1358dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1359dd0a0fcdSRichard Henderson {
1360dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1361dd0a0fcdSRichard Henderson     intptr_t i;
1362dd0a0fcdSRichard Henderson 
1363dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1364dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1365dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1366dd0a0fcdSRichard Henderson         int8_t dd = aa > bb ? aa : bb;
1367dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1368dd0a0fcdSRichard Henderson     }
1369dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1370dd0a0fcdSRichard Henderson }
1371dd0a0fcdSRichard Henderson 
1372dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1373dd0a0fcdSRichard Henderson {
1374dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1375dd0a0fcdSRichard Henderson     intptr_t i;
1376dd0a0fcdSRichard Henderson 
1377dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1378dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1379dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1380dd0a0fcdSRichard Henderson         int16_t dd = aa > bb ? aa : bb;
1381dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1382dd0a0fcdSRichard Henderson     }
1383dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1384dd0a0fcdSRichard Henderson }
1385dd0a0fcdSRichard Henderson 
1386dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1387dd0a0fcdSRichard Henderson {
1388dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1389dd0a0fcdSRichard Henderson     intptr_t i;
1390dd0a0fcdSRichard Henderson 
1391dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1392dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1393dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1394dd0a0fcdSRichard Henderson         int32_t dd = aa > bb ? aa : bb;
1395dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1396dd0a0fcdSRichard Henderson     }
1397dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1398dd0a0fcdSRichard Henderson }
1399dd0a0fcdSRichard Henderson 
1400dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1401dd0a0fcdSRichard Henderson {
1402dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1403dd0a0fcdSRichard Henderson     intptr_t i;
1404dd0a0fcdSRichard Henderson 
1405dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1406dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1407dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1408dd0a0fcdSRichard Henderson         int64_t dd = aa > bb ? aa : bb;
1409dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1410dd0a0fcdSRichard Henderson     }
1411dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1412dd0a0fcdSRichard Henderson }
1413dd0a0fcdSRichard Henderson 
1414dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1415dd0a0fcdSRichard Henderson {
1416dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1417dd0a0fcdSRichard Henderson     intptr_t i;
1418dd0a0fcdSRichard Henderson 
1419dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1420dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1421dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1422dd0a0fcdSRichard Henderson         uint8_t dd = aa < bb ? aa : bb;
1423dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1424dd0a0fcdSRichard Henderson     }
1425dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1426dd0a0fcdSRichard Henderson }
1427dd0a0fcdSRichard Henderson 
1428dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1429dd0a0fcdSRichard Henderson {
1430dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1431dd0a0fcdSRichard Henderson     intptr_t i;
1432dd0a0fcdSRichard Henderson 
1433dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1434dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1435dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1436dd0a0fcdSRichard Henderson         uint16_t dd = aa < bb ? aa : bb;
1437dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1438dd0a0fcdSRichard Henderson     }
1439dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1440dd0a0fcdSRichard Henderson }
1441dd0a0fcdSRichard Henderson 
1442dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1443dd0a0fcdSRichard Henderson {
1444dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1445dd0a0fcdSRichard Henderson     intptr_t i;
1446dd0a0fcdSRichard Henderson 
1447dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1448dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1449dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1450dd0a0fcdSRichard Henderson         uint32_t dd = aa < bb ? aa : bb;
1451dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1452dd0a0fcdSRichard Henderson     }
1453dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1454dd0a0fcdSRichard Henderson }
1455dd0a0fcdSRichard Henderson 
1456dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1457dd0a0fcdSRichard Henderson {
1458dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1459dd0a0fcdSRichard Henderson     intptr_t i;
1460dd0a0fcdSRichard Henderson 
1461dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1462dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1463dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1464dd0a0fcdSRichard Henderson         uint64_t dd = aa < bb ? aa : bb;
1465dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1466dd0a0fcdSRichard Henderson     }
1467dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1468dd0a0fcdSRichard Henderson }
1469dd0a0fcdSRichard Henderson 
1470dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1471dd0a0fcdSRichard Henderson {
1472dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1473dd0a0fcdSRichard Henderson     intptr_t i;
1474dd0a0fcdSRichard Henderson 
1475dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1476dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1477dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1478dd0a0fcdSRichard Henderson         uint8_t dd = aa > bb ? aa : bb;
1479dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1480dd0a0fcdSRichard Henderson     }
1481dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1482dd0a0fcdSRichard Henderson }
1483dd0a0fcdSRichard Henderson 
1484dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1485dd0a0fcdSRichard Henderson {
1486dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1487dd0a0fcdSRichard Henderson     intptr_t i;
1488dd0a0fcdSRichard Henderson 
1489dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1490dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1491dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1492dd0a0fcdSRichard Henderson         uint16_t dd = aa > bb ? aa : bb;
1493dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1494dd0a0fcdSRichard Henderson     }
1495dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1496dd0a0fcdSRichard Henderson }
1497dd0a0fcdSRichard Henderson 
1498dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1499dd0a0fcdSRichard Henderson {
1500dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1501dd0a0fcdSRichard Henderson     intptr_t i;
1502dd0a0fcdSRichard Henderson 
1503dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1504dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1505dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1506dd0a0fcdSRichard Henderson         uint32_t dd = aa > bb ? aa : bb;
1507dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1508dd0a0fcdSRichard Henderson     }
1509dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1510dd0a0fcdSRichard Henderson }
1511dd0a0fcdSRichard Henderson 
1512dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1513dd0a0fcdSRichard Henderson {
1514dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1515dd0a0fcdSRichard Henderson     intptr_t i;
1516dd0a0fcdSRichard Henderson 
1517dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1518dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1519dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1520dd0a0fcdSRichard Henderson         uint64_t dd = aa > bb ? aa : bb;
1521dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1522dd0a0fcdSRichard Henderson     }
1523dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1524dd0a0fcdSRichard Henderson }
152538dc1294SRichard Henderson 
152638dc1294SRichard Henderson void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
152738dc1294SRichard Henderson {
152838dc1294SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
152938dc1294SRichard Henderson     intptr_t i;
153038dc1294SRichard Henderson 
15316c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
15326c7ab301SRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
15336c7ab301SRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
15346c7ab301SRichard Henderson         uint64_t cc = *(uint64_t *)(c + i);
15356c7ab301SRichard Henderson         *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
153638dc1294SRichard Henderson     }
153738dc1294SRichard Henderson     clear_high(d, oprsz, desc);
153838dc1294SRichard Henderson }
1539