xref: /qemu/accel/tcg/tcg-runtime-gvec.c (revision b0f7e7444c03da17e41bf327c8aea590104a28ab)
1db432672SRichard Henderson /*
2db432672SRichard Henderson  * Generic vectorized operation runtime
3db432672SRichard Henderson  *
4db432672SRichard Henderson  * Copyright (c) 2018 Linaro
5db432672SRichard Henderson  *
6db432672SRichard Henderson  * This library is free software; you can redistribute it and/or
7db432672SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
8db432672SRichard Henderson  * License as published by the Free Software Foundation; either
9fb0343d5SThomas Huth  * version 2.1 of the License, or (at your option) any later version.
10db432672SRichard Henderson  *
11db432672SRichard Henderson  * This library is distributed in the hope that it will be useful,
12db432672SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13db432672SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14db432672SRichard Henderson  * Lesser General Public License for more details.
15db432672SRichard Henderson  *
16db432672SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
17db432672SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18db432672SRichard Henderson  */
19db432672SRichard Henderson 
20db432672SRichard Henderson #include "qemu/osdep.h"
21db432672SRichard Henderson #include "qemu/host-utils.h"
22db432672SRichard Henderson #include "cpu.h"
23db432672SRichard Henderson #include "exec/helper-proto.h"
24dcb32f1dSPhilippe Mathieu-Daudé #include "tcg/tcg-gvec-desc.h"
25db432672SRichard Henderson 
26db432672SRichard Henderson 
27db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
28db432672SRichard Henderson {
29db432672SRichard Henderson     intptr_t maxsz = simd_maxsz(desc);
30db432672SRichard Henderson     intptr_t i;
31db432672SRichard Henderson 
32db432672SRichard Henderson     if (unlikely(maxsz > oprsz)) {
33db432672SRichard Henderson         for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
34db432672SRichard Henderson             *(uint64_t *)(d + i) = 0;
35db432672SRichard Henderson         }
36db432672SRichard Henderson     }
37db432672SRichard Henderson }
38db432672SRichard Henderson 
39db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
40db432672SRichard Henderson {
41db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
42db432672SRichard Henderson     intptr_t i;
43db432672SRichard Henderson 
446c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
456c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
46db432672SRichard Henderson     }
47db432672SRichard Henderson     clear_high(d, oprsz, desc);
48db432672SRichard Henderson }
49db432672SRichard Henderson 
50db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
51db432672SRichard Henderson {
52db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
53db432672SRichard Henderson     intptr_t i;
54db432672SRichard Henderson 
556c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
566c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
57db432672SRichard Henderson     }
58db432672SRichard Henderson     clear_high(d, oprsz, desc);
59db432672SRichard Henderson }
60db432672SRichard Henderson 
61db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
62db432672SRichard Henderson {
63db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
64db432672SRichard Henderson     intptr_t i;
65db432672SRichard Henderson 
666c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
676c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
68db432672SRichard Henderson     }
69db432672SRichard Henderson     clear_high(d, oprsz, desc);
70db432672SRichard Henderson }
71db432672SRichard Henderson 
72db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
73db432672SRichard Henderson {
74db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
75db432672SRichard Henderson     intptr_t i;
76db432672SRichard Henderson 
776c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
786c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
79db432672SRichard Henderson     }
80db432672SRichard Henderson     clear_high(d, oprsz, desc);
81db432672SRichard Henderson }
82db432672SRichard Henderson 
8322fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
8422fc3527SRichard Henderson {
8522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8622fc3527SRichard Henderson     intptr_t i;
8722fc3527SRichard Henderson 
886c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
890a83e43aSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
9022fc3527SRichard Henderson     }
9122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
9222fc3527SRichard Henderson }
9322fc3527SRichard Henderson 
9422fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
9522fc3527SRichard Henderson {
9622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9722fc3527SRichard Henderson     intptr_t i;
9822fc3527SRichard Henderson 
996c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1000a83e43aSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
10122fc3527SRichard Henderson     }
10222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
10322fc3527SRichard Henderson }
10422fc3527SRichard Henderson 
10522fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
10622fc3527SRichard Henderson {
10722fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
10822fc3527SRichard Henderson     intptr_t i;
10922fc3527SRichard Henderson 
1106c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1110a83e43aSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
11222fc3527SRichard Henderson     }
11322fc3527SRichard Henderson     clear_high(d, oprsz, desc);
11422fc3527SRichard Henderson }
11522fc3527SRichard Henderson 
11622fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
11722fc3527SRichard Henderson {
11822fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
11922fc3527SRichard Henderson     intptr_t i;
12022fc3527SRichard Henderson 
1216c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1220a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
12322fc3527SRichard Henderson     }
12422fc3527SRichard Henderson     clear_high(d, oprsz, desc);
12522fc3527SRichard Henderson }
12622fc3527SRichard Henderson 
127db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
128db432672SRichard Henderson {
129db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
130db432672SRichard Henderson     intptr_t i;
131db432672SRichard Henderson 
1326c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1336c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
134db432672SRichard Henderson     }
135db432672SRichard Henderson     clear_high(d, oprsz, desc);
136db432672SRichard Henderson }
137db432672SRichard Henderson 
138db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
139db432672SRichard Henderson {
140db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
141db432672SRichard Henderson     intptr_t i;
142db432672SRichard Henderson 
1436c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1446c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
145db432672SRichard Henderson     }
146db432672SRichard Henderson     clear_high(d, oprsz, desc);
147db432672SRichard Henderson }
148db432672SRichard Henderson 
149db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
150db432672SRichard Henderson {
151db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
152db432672SRichard Henderson     intptr_t i;
153db432672SRichard Henderson 
1546c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1556c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
156db432672SRichard Henderson     }
157db432672SRichard Henderson     clear_high(d, oprsz, desc);
158db432672SRichard Henderson }
159db432672SRichard Henderson 
160db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
161db432672SRichard Henderson {
162db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
163db432672SRichard Henderson     intptr_t i;
164db432672SRichard Henderson 
1656c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1666c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
167db432672SRichard Henderson     }
168db432672SRichard Henderson     clear_high(d, oprsz, desc);
169db432672SRichard Henderson }
170db432672SRichard Henderson 
17122fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
17222fc3527SRichard Henderson {
17322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
17422fc3527SRichard Henderson     intptr_t i;
17522fc3527SRichard Henderson 
1766c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1770a83e43aSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
17822fc3527SRichard Henderson     }
17922fc3527SRichard Henderson     clear_high(d, oprsz, desc);
18022fc3527SRichard Henderson }
18122fc3527SRichard Henderson 
18222fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
18322fc3527SRichard Henderson {
18422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
18522fc3527SRichard Henderson     intptr_t i;
18622fc3527SRichard Henderson 
1876c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1880a83e43aSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
18922fc3527SRichard Henderson     }
19022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
19122fc3527SRichard Henderson }
19222fc3527SRichard Henderson 
19322fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
19422fc3527SRichard Henderson {
19522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
19622fc3527SRichard Henderson     intptr_t i;
19722fc3527SRichard Henderson 
1986c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1990a83e43aSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
20022fc3527SRichard Henderson     }
20122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
20222fc3527SRichard Henderson }
20322fc3527SRichard Henderson 
20422fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
20522fc3527SRichard Henderson {
20622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
20722fc3527SRichard Henderson     intptr_t i;
20822fc3527SRichard Henderson 
2096c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2100a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
21122fc3527SRichard Henderson     }
21222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
21322fc3527SRichard Henderson }
21422fc3527SRichard Henderson 
2153774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
2163774030aSRichard Henderson {
2173774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2183774030aSRichard Henderson     intptr_t i;
2193774030aSRichard Henderson 
2206c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
2216c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
2223774030aSRichard Henderson     }
2233774030aSRichard Henderson     clear_high(d, oprsz, desc);
2243774030aSRichard Henderson }
2253774030aSRichard Henderson 
2263774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
2273774030aSRichard Henderson {
2283774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2293774030aSRichard Henderson     intptr_t i;
2303774030aSRichard Henderson 
2316c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
2326c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
2333774030aSRichard Henderson     }
2343774030aSRichard Henderson     clear_high(d, oprsz, desc);
2353774030aSRichard Henderson }
2363774030aSRichard Henderson 
2373774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
2383774030aSRichard Henderson {
2393774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2403774030aSRichard Henderson     intptr_t i;
2413774030aSRichard Henderson 
2426c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
2436c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
2443774030aSRichard Henderson     }
2453774030aSRichard Henderson     clear_high(d, oprsz, desc);
2463774030aSRichard Henderson }
2473774030aSRichard Henderson 
2483774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
2493774030aSRichard Henderson {
2503774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2513774030aSRichard Henderson     intptr_t i;
2523774030aSRichard Henderson 
2536c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2546c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
2553774030aSRichard Henderson     }
2563774030aSRichard Henderson     clear_high(d, oprsz, desc);
2573774030aSRichard Henderson }
2583774030aSRichard Henderson 
25922fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
26022fc3527SRichard Henderson {
26122fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
26222fc3527SRichard Henderson     intptr_t i;
26322fc3527SRichard Henderson 
2646c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
2650a83e43aSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
26622fc3527SRichard Henderson     }
26722fc3527SRichard Henderson     clear_high(d, oprsz, desc);
26822fc3527SRichard Henderson }
26922fc3527SRichard Henderson 
27022fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
27122fc3527SRichard Henderson {
27222fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
27322fc3527SRichard Henderson     intptr_t i;
27422fc3527SRichard Henderson 
2756c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
2760a83e43aSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
27722fc3527SRichard Henderson     }
27822fc3527SRichard Henderson     clear_high(d, oprsz, desc);
27922fc3527SRichard Henderson }
28022fc3527SRichard Henderson 
28122fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
28222fc3527SRichard Henderson {
28322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
28422fc3527SRichard Henderson     intptr_t i;
28522fc3527SRichard Henderson 
2866c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
2870a83e43aSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
28822fc3527SRichard Henderson     }
28922fc3527SRichard Henderson     clear_high(d, oprsz, desc);
29022fc3527SRichard Henderson }
29122fc3527SRichard Henderson 
29222fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
29322fc3527SRichard Henderson {
29422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
29522fc3527SRichard Henderson     intptr_t i;
29622fc3527SRichard Henderson 
2976c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2980a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
29922fc3527SRichard Henderson     }
30022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
30122fc3527SRichard Henderson }
30222fc3527SRichard Henderson 
303db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
304db432672SRichard Henderson {
305db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
306db432672SRichard Henderson     intptr_t i;
307db432672SRichard Henderson 
3086c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
3096c7ab301SRichard Henderson         *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
310db432672SRichard Henderson     }
311db432672SRichard Henderson     clear_high(d, oprsz, desc);
312db432672SRichard Henderson }
313db432672SRichard Henderson 
314db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
315db432672SRichard Henderson {
316db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
317db432672SRichard Henderson     intptr_t i;
318db432672SRichard Henderson 
3196c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
3206c7ab301SRichard Henderson         *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
321db432672SRichard Henderson     }
322db432672SRichard Henderson     clear_high(d, oprsz, desc);
323db432672SRichard Henderson }
324db432672SRichard Henderson 
325db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
326db432672SRichard Henderson {
327db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
328db432672SRichard Henderson     intptr_t i;
329db432672SRichard Henderson 
3306c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
3316c7ab301SRichard Henderson         *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
332db432672SRichard Henderson     }
333db432672SRichard Henderson     clear_high(d, oprsz, desc);
334db432672SRichard Henderson }
335db432672SRichard Henderson 
336db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
337db432672SRichard Henderson {
338db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
339db432672SRichard Henderson     intptr_t i;
340db432672SRichard Henderson 
3416c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
3426c7ab301SRichard Henderson         *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
343db432672SRichard Henderson     }
344db432672SRichard Henderson     clear_high(d, oprsz, desc);
345db432672SRichard Henderson }
346db432672SRichard Henderson 
347bcefc902SRichard Henderson void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
348bcefc902SRichard Henderson {
349bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
350bcefc902SRichard Henderson     intptr_t i;
351bcefc902SRichard Henderson 
352bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
353bcefc902SRichard Henderson         int8_t aa = *(int8_t *)(a + i);
354bcefc902SRichard Henderson         *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
355bcefc902SRichard Henderson     }
356bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
357bcefc902SRichard Henderson }
358bcefc902SRichard Henderson 
359bcefc902SRichard Henderson void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
360bcefc902SRichard Henderson {
361bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
362bcefc902SRichard Henderson     intptr_t i;
363bcefc902SRichard Henderson 
364bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
365bcefc902SRichard Henderson         int16_t aa = *(int16_t *)(a + i);
366bcefc902SRichard Henderson         *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
367bcefc902SRichard Henderson     }
368bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
369bcefc902SRichard Henderson }
370bcefc902SRichard Henderson 
371bcefc902SRichard Henderson void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
372bcefc902SRichard Henderson {
373bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
374bcefc902SRichard Henderson     intptr_t i;
375bcefc902SRichard Henderson 
376bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
377bcefc902SRichard Henderson         int32_t aa = *(int32_t *)(a + i);
378bcefc902SRichard Henderson         *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
379bcefc902SRichard Henderson     }
380bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
381bcefc902SRichard Henderson }
382bcefc902SRichard Henderson 
383bcefc902SRichard Henderson void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
384bcefc902SRichard Henderson {
385bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
386bcefc902SRichard Henderson     intptr_t i;
387bcefc902SRichard Henderson 
388bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
389bcefc902SRichard Henderson         int64_t aa = *(int64_t *)(a + i);
390bcefc902SRichard Henderson         *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
391bcefc902SRichard Henderson     }
392bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
393bcefc902SRichard Henderson }
394bcefc902SRichard Henderson 
395db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
396db432672SRichard Henderson {
397db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
398db432672SRichard Henderson 
399db432672SRichard Henderson     memcpy(d, a, oprsz);
400db432672SRichard Henderson     clear_high(d, oprsz, desc);
401db432672SRichard Henderson }
402db432672SRichard Henderson 
403db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
404db432672SRichard Henderson {
405db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
406db432672SRichard Henderson     intptr_t i;
407db432672SRichard Henderson 
408db432672SRichard Henderson     if (c == 0) {
409db432672SRichard Henderson         oprsz = 0;
410db432672SRichard Henderson     } else {
411db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
412db432672SRichard Henderson             *(uint64_t *)(d + i) = c;
413db432672SRichard Henderson         }
414db432672SRichard Henderson     }
415db432672SRichard Henderson     clear_high(d, oprsz, desc);
416db432672SRichard Henderson }
417db432672SRichard Henderson 
418db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
419db432672SRichard Henderson {
420db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
421db432672SRichard Henderson     intptr_t i;
422db432672SRichard Henderson 
423db432672SRichard Henderson     if (c == 0) {
424db432672SRichard Henderson         oprsz = 0;
425db432672SRichard Henderson     } else {
426db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
427db432672SRichard Henderson             *(uint32_t *)(d + i) = c;
428db432672SRichard Henderson         }
429db432672SRichard Henderson     }
430db432672SRichard Henderson     clear_high(d, oprsz, desc);
431db432672SRichard Henderson }
432db432672SRichard Henderson 
433db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
434db432672SRichard Henderson {
435db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
436db432672SRichard Henderson }
437db432672SRichard Henderson 
438db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
439db432672SRichard Henderson {
440db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
441db432672SRichard Henderson }
442db432672SRichard Henderson 
443db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
444db432672SRichard Henderson {
445db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
446db432672SRichard Henderson     intptr_t i;
447db432672SRichard Henderson 
4486c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4496c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
450db432672SRichard Henderson     }
451db432672SRichard Henderson     clear_high(d, oprsz, desc);
452db432672SRichard Henderson }
453db432672SRichard Henderson 
454db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
455db432672SRichard Henderson {
456db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
457db432672SRichard Henderson     intptr_t i;
458db432672SRichard Henderson 
4596c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4606c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
461db432672SRichard Henderson     }
462db432672SRichard Henderson     clear_high(d, oprsz, desc);
463db432672SRichard Henderson }
464db432672SRichard Henderson 
465db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
466db432672SRichard Henderson {
467db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
468db432672SRichard Henderson     intptr_t i;
469db432672SRichard Henderson 
4706c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4716c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
472db432672SRichard Henderson     }
473db432672SRichard Henderson     clear_high(d, oprsz, desc);
474db432672SRichard Henderson }
475db432672SRichard Henderson 
476db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
477db432672SRichard Henderson {
478db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
479db432672SRichard Henderson     intptr_t i;
480db432672SRichard Henderson 
4816c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4826c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
483db432672SRichard Henderson     }
484db432672SRichard Henderson     clear_high(d, oprsz, desc);
485db432672SRichard Henderson }
486db432672SRichard Henderson 
487db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
488db432672SRichard Henderson {
489db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
490db432672SRichard Henderson     intptr_t i;
491db432672SRichard Henderson 
4926c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4936c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
494db432672SRichard Henderson     }
495db432672SRichard Henderson     clear_high(d, oprsz, desc);
496db432672SRichard Henderson }
497db432672SRichard Henderson 
498db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
499db432672SRichard Henderson {
500db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
501db432672SRichard Henderson     intptr_t i;
502db432672SRichard Henderson 
5036c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5046c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
505db432672SRichard Henderson     }
506db432672SRichard Henderson     clear_high(d, oprsz, desc);
507db432672SRichard Henderson }
508d0ec9796SRichard Henderson 
509f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
510f550805dSRichard Henderson {
511f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
512f550805dSRichard Henderson     intptr_t i;
513f550805dSRichard Henderson 
5146c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5156c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
516f550805dSRichard Henderson     }
517f550805dSRichard Henderson     clear_high(d, oprsz, desc);
518f550805dSRichard Henderson }
519f550805dSRichard Henderson 
520f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
521f550805dSRichard Henderson {
522f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
523f550805dSRichard Henderson     intptr_t i;
524f550805dSRichard Henderson 
5256c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5266c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
527f550805dSRichard Henderson     }
528f550805dSRichard Henderson     clear_high(d, oprsz, desc);
529f550805dSRichard Henderson }
530f550805dSRichard Henderson 
531f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
532f550805dSRichard Henderson {
533f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
534f550805dSRichard Henderson     intptr_t i;
535f550805dSRichard Henderson 
5366c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5376c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
538f550805dSRichard Henderson     }
539f550805dSRichard Henderson     clear_high(d, oprsz, desc);
540f550805dSRichard Henderson }
541f550805dSRichard Henderson 
54222fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
54322fc3527SRichard Henderson {
54422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
54522fc3527SRichard Henderson     intptr_t i;
54622fc3527SRichard Henderson 
5476c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5480a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
54922fc3527SRichard Henderson     }
55022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
55122fc3527SRichard Henderson }
55222fc3527SRichard Henderson 
55322fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
55422fc3527SRichard Henderson {
55522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
55622fc3527SRichard Henderson     intptr_t i;
55722fc3527SRichard Henderson 
5586c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5590a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
56022fc3527SRichard Henderson     }
56122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
56222fc3527SRichard Henderson }
56322fc3527SRichard Henderson 
56422fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
56522fc3527SRichard Henderson {
56622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
56722fc3527SRichard Henderson     intptr_t i;
56822fc3527SRichard Henderson 
5696c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5700a83e43aSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
57122fc3527SRichard Henderson     }
57222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
57322fc3527SRichard Henderson }
57422fc3527SRichard Henderson 
575d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
576d0ec9796SRichard Henderson {
577d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
578d0ec9796SRichard Henderson     int shift = simd_data(desc);
579d0ec9796SRichard Henderson     intptr_t i;
580d0ec9796SRichard Henderson 
5816c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
5826c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
583d0ec9796SRichard Henderson     }
584d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
585d0ec9796SRichard Henderson }
586d0ec9796SRichard Henderson 
587d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
588d0ec9796SRichard Henderson {
589d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
590d0ec9796SRichard Henderson     int shift = simd_data(desc);
591d0ec9796SRichard Henderson     intptr_t i;
592d0ec9796SRichard Henderson 
5936c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
5946c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
595d0ec9796SRichard Henderson     }
596d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
597d0ec9796SRichard Henderson }
598d0ec9796SRichard Henderson 
599d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
600d0ec9796SRichard Henderson {
601d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
602d0ec9796SRichard Henderson     int shift = simd_data(desc);
603d0ec9796SRichard Henderson     intptr_t i;
604d0ec9796SRichard Henderson 
6056c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
6066c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
607d0ec9796SRichard Henderson     }
608d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
609d0ec9796SRichard Henderson }
610d0ec9796SRichard Henderson 
611d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
612d0ec9796SRichard Henderson {
613d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
614d0ec9796SRichard Henderson     int shift = simd_data(desc);
615d0ec9796SRichard Henderson     intptr_t i;
616d0ec9796SRichard Henderson 
6176c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
6186c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
619d0ec9796SRichard Henderson     }
620d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
621d0ec9796SRichard Henderson }
622d0ec9796SRichard Henderson 
623d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
624d0ec9796SRichard Henderson {
625d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
626d0ec9796SRichard Henderson     int shift = simd_data(desc);
627d0ec9796SRichard Henderson     intptr_t i;
628d0ec9796SRichard Henderson 
6296c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
6306c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
631d0ec9796SRichard Henderson     }
632d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
633d0ec9796SRichard Henderson }
634d0ec9796SRichard Henderson 
635d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
636d0ec9796SRichard Henderson {
637d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
638d0ec9796SRichard Henderson     int shift = simd_data(desc);
639d0ec9796SRichard Henderson     intptr_t i;
640d0ec9796SRichard Henderson 
6416c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
6426c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
643d0ec9796SRichard Henderson     }
644d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
645d0ec9796SRichard Henderson }
646d0ec9796SRichard Henderson 
647d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
648d0ec9796SRichard Henderson {
649d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
650d0ec9796SRichard Henderson     int shift = simd_data(desc);
651d0ec9796SRichard Henderson     intptr_t i;
652d0ec9796SRichard Henderson 
6536c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
6546c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
655d0ec9796SRichard Henderson     }
656d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
657d0ec9796SRichard Henderson }
658d0ec9796SRichard Henderson 
659d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
660d0ec9796SRichard Henderson {
661d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
662d0ec9796SRichard Henderson     int shift = simd_data(desc);
663d0ec9796SRichard Henderson     intptr_t i;
664d0ec9796SRichard Henderson 
6656c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
6666c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
667d0ec9796SRichard Henderson     }
668d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
669d0ec9796SRichard Henderson }
670d0ec9796SRichard Henderson 
671d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
672d0ec9796SRichard Henderson {
673d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
674d0ec9796SRichard Henderson     int shift = simd_data(desc);
675d0ec9796SRichard Henderson     intptr_t i;
676d0ec9796SRichard Henderson 
6776c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
6786c7ab301SRichard Henderson         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
679d0ec9796SRichard Henderson     }
680d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
681d0ec9796SRichard Henderson }
682d0ec9796SRichard Henderson 
683d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
684d0ec9796SRichard Henderson {
685d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
686d0ec9796SRichard Henderson     int shift = simd_data(desc);
687d0ec9796SRichard Henderson     intptr_t i;
688d0ec9796SRichard Henderson 
6896c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
6906c7ab301SRichard Henderson         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
691d0ec9796SRichard Henderson     }
692d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
693d0ec9796SRichard Henderson }
694d0ec9796SRichard Henderson 
695d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
696d0ec9796SRichard Henderson {
697d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
698d0ec9796SRichard Henderson     int shift = simd_data(desc);
699d0ec9796SRichard Henderson     intptr_t i;
700d0ec9796SRichard Henderson 
7016c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
7026c7ab301SRichard Henderson         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
703d0ec9796SRichard Henderson     }
704d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
705d0ec9796SRichard Henderson }
706d0ec9796SRichard Henderson 
707d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
708d0ec9796SRichard Henderson {
709d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
710d0ec9796SRichard Henderson     int shift = simd_data(desc);
711d0ec9796SRichard Henderson     intptr_t i;
712d0ec9796SRichard Henderson 
7136c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
7146c7ab301SRichard Henderson         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
715d0ec9796SRichard Henderson     }
716d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
717d0ec9796SRichard Henderson }
718212be173SRichard Henderson 
719*b0f7e744SRichard Henderson void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
720*b0f7e744SRichard Henderson {
721*b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
722*b0f7e744SRichard Henderson     int shift = simd_data(desc);
723*b0f7e744SRichard Henderson     intptr_t i;
724*b0f7e744SRichard Henderson 
725*b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
726*b0f7e744SRichard Henderson         *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
727*b0f7e744SRichard Henderson     }
728*b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
729*b0f7e744SRichard Henderson }
730*b0f7e744SRichard Henderson 
731*b0f7e744SRichard Henderson void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
732*b0f7e744SRichard Henderson {
733*b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
734*b0f7e744SRichard Henderson     int shift = simd_data(desc);
735*b0f7e744SRichard Henderson     intptr_t i;
736*b0f7e744SRichard Henderson 
737*b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
738*b0f7e744SRichard Henderson         *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
739*b0f7e744SRichard Henderson     }
740*b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
741*b0f7e744SRichard Henderson }
742*b0f7e744SRichard Henderson 
743*b0f7e744SRichard Henderson void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
744*b0f7e744SRichard Henderson {
745*b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
746*b0f7e744SRichard Henderson     int shift = simd_data(desc);
747*b0f7e744SRichard Henderson     intptr_t i;
748*b0f7e744SRichard Henderson 
749*b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
750*b0f7e744SRichard Henderson         *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
751*b0f7e744SRichard Henderson     }
752*b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
753*b0f7e744SRichard Henderson }
754*b0f7e744SRichard Henderson 
755*b0f7e744SRichard Henderson void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
756*b0f7e744SRichard Henderson {
757*b0f7e744SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
758*b0f7e744SRichard Henderson     int shift = simd_data(desc);
759*b0f7e744SRichard Henderson     intptr_t i;
760*b0f7e744SRichard Henderson 
761*b0f7e744SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
762*b0f7e744SRichard Henderson         *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
763*b0f7e744SRichard Henderson     }
764*b0f7e744SRichard Henderson     clear_high(d, oprsz, desc);
765*b0f7e744SRichard Henderson }
766*b0f7e744SRichard Henderson 
7675ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
7685ee5c14cSRichard Henderson {
7695ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7705ee5c14cSRichard Henderson     intptr_t i;
7715ee5c14cSRichard Henderson 
7725ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
7735ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
7745ee5c14cSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
7755ee5c14cSRichard Henderson     }
7765ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7775ee5c14cSRichard Henderson }
7785ee5c14cSRichard Henderson 
7795ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
7805ee5c14cSRichard Henderson {
7815ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7825ee5c14cSRichard Henderson     intptr_t i;
7835ee5c14cSRichard Henderson 
7845ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
7855ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
7865ee5c14cSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
7875ee5c14cSRichard Henderson     }
7885ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7895ee5c14cSRichard Henderson }
7905ee5c14cSRichard Henderson 
7915ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
7925ee5c14cSRichard Henderson {
7935ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7945ee5c14cSRichard Henderson     intptr_t i;
7955ee5c14cSRichard Henderson 
7965ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
7975ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
7985ee5c14cSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
7995ee5c14cSRichard Henderson     }
8005ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8015ee5c14cSRichard Henderson }
8025ee5c14cSRichard Henderson 
8035ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
8045ee5c14cSRichard Henderson {
8055ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8065ee5c14cSRichard Henderson     intptr_t i;
8075ee5c14cSRichard Henderson 
8085ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8095ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
8105ee5c14cSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
8115ee5c14cSRichard Henderson     }
8125ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8135ee5c14cSRichard Henderson }
8145ee5c14cSRichard Henderson 
8155ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
8165ee5c14cSRichard Henderson {
8175ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8185ee5c14cSRichard Henderson     intptr_t i;
8195ee5c14cSRichard Henderson 
8205ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
8215ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
8225ee5c14cSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
8235ee5c14cSRichard Henderson     }
8245ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8255ee5c14cSRichard Henderson }
8265ee5c14cSRichard Henderson 
8275ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
8285ee5c14cSRichard Henderson {
8295ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8305ee5c14cSRichard Henderson     intptr_t i;
8315ee5c14cSRichard Henderson 
8325ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
8335ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
8345ee5c14cSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
8355ee5c14cSRichard Henderson     }
8365ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8375ee5c14cSRichard Henderson }
8385ee5c14cSRichard Henderson 
8395ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
8405ee5c14cSRichard Henderson {
8415ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8425ee5c14cSRichard Henderson     intptr_t i;
8435ee5c14cSRichard Henderson 
8445ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
8455ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
8465ee5c14cSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
8475ee5c14cSRichard Henderson     }
8485ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8495ee5c14cSRichard Henderson }
8505ee5c14cSRichard Henderson 
8515ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
8525ee5c14cSRichard Henderson {
8535ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8545ee5c14cSRichard Henderson     intptr_t i;
8555ee5c14cSRichard Henderson 
8565ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8575ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
8585ee5c14cSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
8595ee5c14cSRichard Henderson     }
8605ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8615ee5c14cSRichard Henderson }
8625ee5c14cSRichard Henderson 
8635ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
8645ee5c14cSRichard Henderson {
8655ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8665ee5c14cSRichard Henderson     intptr_t i;
8675ee5c14cSRichard Henderson 
868899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
8695ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
8705ee5c14cSRichard Henderson         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
8715ee5c14cSRichard Henderson     }
8725ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8735ee5c14cSRichard Henderson }
8745ee5c14cSRichard Henderson 
8755ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
8765ee5c14cSRichard Henderson {
8775ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8785ee5c14cSRichard Henderson     intptr_t i;
8795ee5c14cSRichard Henderson 
8805ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
8815ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
8825ee5c14cSRichard Henderson         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
8835ee5c14cSRichard Henderson     }
8845ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8855ee5c14cSRichard Henderson }
8865ee5c14cSRichard Henderson 
8875ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
8885ee5c14cSRichard Henderson {
8895ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8905ee5c14cSRichard Henderson     intptr_t i;
8915ee5c14cSRichard Henderson 
892899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
8935ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
8945ee5c14cSRichard Henderson         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
8955ee5c14cSRichard Henderson     }
8965ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8975ee5c14cSRichard Henderson }
8985ee5c14cSRichard Henderson 
8995ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
9005ee5c14cSRichard Henderson {
9015ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9025ee5c14cSRichard Henderson     intptr_t i;
9035ee5c14cSRichard Henderson 
904899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
9055ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
9065ee5c14cSRichard Henderson         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
9075ee5c14cSRichard Henderson     }
9085ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
9095ee5c14cSRichard Henderson }
9105ee5c14cSRichard Henderson 
911212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP)                                            \
912212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
913212be173SRichard Henderson {                                                                          \
914212be173SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);                                     \
915212be173SRichard Henderson     intptr_t i;                                                            \
9166cb1d3b8SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
9170270bd50SRichard Henderson         *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i));        \
918212be173SRichard Henderson     }                                                                      \
919212be173SRichard Henderson     clear_high(d, oprsz, desc);                                            \
920212be173SRichard Henderson }
921212be173SRichard Henderson 
922212be173SRichard Henderson #define DO_CMP2(SZ) \
9236c7ab301SRichard Henderson     DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==)    \
9246c7ab301SRichard Henderson     DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=)    \
9256c7ab301SRichard Henderson     DO_CMP1(gvec_lt##SZ, int##SZ##_t, <)      \
9266c7ab301SRichard Henderson     DO_CMP1(gvec_le##SZ, int##SZ##_t, <=)     \
9276c7ab301SRichard Henderson     DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <)    \
9286c7ab301SRichard Henderson     DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
929212be173SRichard Henderson 
930212be173SRichard Henderson DO_CMP2(8)
931212be173SRichard Henderson DO_CMP2(16)
932212be173SRichard Henderson DO_CMP2(32)
933212be173SRichard Henderson DO_CMP2(64)
934212be173SRichard Henderson 
935212be173SRichard Henderson #undef DO_CMP1
936212be173SRichard Henderson #undef DO_CMP2
937f49b12c6SRichard Henderson 
938f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
939f49b12c6SRichard Henderson {
940f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
941f49b12c6SRichard Henderson     intptr_t i;
942f49b12c6SRichard Henderson 
943f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
944f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
945f49b12c6SRichard Henderson         if (r > INT8_MAX) {
946f49b12c6SRichard Henderson             r = INT8_MAX;
947f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
948f49b12c6SRichard Henderson             r = INT8_MIN;
949f49b12c6SRichard Henderson         }
950f49b12c6SRichard Henderson         *(int8_t *)(d + i) = r;
951f49b12c6SRichard Henderson     }
952f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
953f49b12c6SRichard Henderson }
954f49b12c6SRichard Henderson 
955f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
956f49b12c6SRichard Henderson {
957f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
958f49b12c6SRichard Henderson     intptr_t i;
959f49b12c6SRichard Henderson 
960f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
961f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
962f49b12c6SRichard Henderson         if (r > INT16_MAX) {
963f49b12c6SRichard Henderson             r = INT16_MAX;
964f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
965f49b12c6SRichard Henderson             r = INT16_MIN;
966f49b12c6SRichard Henderson         }
967f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
968f49b12c6SRichard Henderson     }
969f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
970f49b12c6SRichard Henderson }
971f49b12c6SRichard Henderson 
972f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
973f49b12c6SRichard Henderson {
974f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
975f49b12c6SRichard Henderson     intptr_t i;
976f49b12c6SRichard Henderson 
977f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
978f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
979f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
980f49b12c6SRichard Henderson         int32_t di = ai + bi;
981f49b12c6SRichard Henderson         if (((di ^ ai) &~ (ai ^ bi)) < 0) {
982f49b12c6SRichard Henderson             /* Signed overflow.  */
983f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
984f49b12c6SRichard Henderson         }
985f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
986f49b12c6SRichard Henderson     }
987f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
988f49b12c6SRichard Henderson }
989f49b12c6SRichard Henderson 
990f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
991f49b12c6SRichard Henderson {
992f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
993f49b12c6SRichard Henderson     intptr_t i;
994f49b12c6SRichard Henderson 
995f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
996f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
997f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
998f49b12c6SRichard Henderson         int64_t di = ai + bi;
999f49b12c6SRichard Henderson         if (((di ^ ai) &~ (ai ^ bi)) < 0) {
1000f49b12c6SRichard Henderson             /* Signed overflow.  */
1001f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
1002f49b12c6SRichard Henderson         }
1003f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
1004f49b12c6SRichard Henderson     }
1005f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1006f49b12c6SRichard Henderson }
1007f49b12c6SRichard Henderson 
1008f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1009f49b12c6SRichard Henderson {
1010f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1011f49b12c6SRichard Henderson     intptr_t i;
1012f49b12c6SRichard Henderson 
1013f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1014f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1015f49b12c6SRichard Henderson         if (r > INT8_MAX) {
1016f49b12c6SRichard Henderson             r = INT8_MAX;
1017f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
1018f49b12c6SRichard Henderson             r = INT8_MIN;
1019f49b12c6SRichard Henderson         }
1020f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1021f49b12c6SRichard Henderson     }
1022f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1023f49b12c6SRichard Henderson }
1024f49b12c6SRichard Henderson 
1025f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1026f49b12c6SRichard Henderson {
1027f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1028f49b12c6SRichard Henderson     intptr_t i;
1029f49b12c6SRichard Henderson 
1030f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1031f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1032f49b12c6SRichard Henderson         if (r > INT16_MAX) {
1033f49b12c6SRichard Henderson             r = INT16_MAX;
1034f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
1035f49b12c6SRichard Henderson             r = INT16_MIN;
1036f49b12c6SRichard Henderson         }
1037f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
1038f49b12c6SRichard Henderson     }
1039f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1040f49b12c6SRichard Henderson }
1041f49b12c6SRichard Henderson 
1042f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1043f49b12c6SRichard Henderson {
1044f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1045f49b12c6SRichard Henderson     intptr_t i;
1046f49b12c6SRichard Henderson 
1047f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1048f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
1049f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
1050f49b12c6SRichard Henderson         int32_t di = ai - bi;
1051f49b12c6SRichard Henderson         if (((di ^ ai) & (ai ^ bi)) < 0) {
1052f49b12c6SRichard Henderson             /* Signed overflow.  */
1053f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
1054f49b12c6SRichard Henderson         }
1055f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
1056f49b12c6SRichard Henderson     }
1057f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1058f49b12c6SRichard Henderson }
1059f49b12c6SRichard Henderson 
1060f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1061f49b12c6SRichard Henderson {
1062f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1063f49b12c6SRichard Henderson     intptr_t i;
1064f49b12c6SRichard Henderson 
1065f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1066f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
1067f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
1068f49b12c6SRichard Henderson         int64_t di = ai - bi;
1069f49b12c6SRichard Henderson         if (((di ^ ai) & (ai ^ bi)) < 0) {
1070f49b12c6SRichard Henderson             /* Signed overflow.  */
1071f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
1072f49b12c6SRichard Henderson         }
1073f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
1074f49b12c6SRichard Henderson     }
1075f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1076f49b12c6SRichard Henderson }
1077f49b12c6SRichard Henderson 
1078f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1079f49b12c6SRichard Henderson {
1080f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1081f49b12c6SRichard Henderson     intptr_t i;
1082f49b12c6SRichard Henderson 
1083f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1084f49b12c6SRichard Henderson         unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1085f49b12c6SRichard Henderson         if (r > UINT8_MAX) {
1086f49b12c6SRichard Henderson             r = UINT8_MAX;
1087f49b12c6SRichard Henderson         }
1088f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1089f49b12c6SRichard Henderson     }
1090f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1091f49b12c6SRichard Henderson }
1092f49b12c6SRichard Henderson 
1093f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1094f49b12c6SRichard Henderson {
1095f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1096f49b12c6SRichard Henderson     intptr_t i;
1097f49b12c6SRichard Henderson 
1098f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1099f49b12c6SRichard Henderson         unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1100f49b12c6SRichard Henderson         if (r > UINT16_MAX) {
1101f49b12c6SRichard Henderson             r = UINT16_MAX;
1102f49b12c6SRichard Henderson         }
1103f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
1104f49b12c6SRichard Henderson     }
1105f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1106f49b12c6SRichard Henderson }
1107f49b12c6SRichard Henderson 
1108f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1109f49b12c6SRichard Henderson {
1110f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1111f49b12c6SRichard Henderson     intptr_t i;
1112f49b12c6SRichard Henderson 
1113f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1114f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1115f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
1116f49b12c6SRichard Henderson         uint32_t di = ai + bi;
1117f49b12c6SRichard Henderson         if (di < ai) {
1118f49b12c6SRichard Henderson             di = UINT32_MAX;
1119f49b12c6SRichard Henderson         }
1120f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1121f49b12c6SRichard Henderson     }
1122f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1123f49b12c6SRichard Henderson }
1124f49b12c6SRichard Henderson 
1125f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1126f49b12c6SRichard Henderson {
1127f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1128f49b12c6SRichard Henderson     intptr_t i;
1129f49b12c6SRichard Henderson 
1130f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1131f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1132f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
1133f49b12c6SRichard Henderson         uint64_t di = ai + bi;
1134f49b12c6SRichard Henderson         if (di < ai) {
1135f49b12c6SRichard Henderson             di = UINT64_MAX;
1136f49b12c6SRichard Henderson         }
1137f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1138f49b12c6SRichard Henderson     }
1139f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1140f49b12c6SRichard Henderson }
1141f49b12c6SRichard Henderson 
1142f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1143f49b12c6SRichard Henderson {
1144f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1145f49b12c6SRichard Henderson     intptr_t i;
1146f49b12c6SRichard Henderson 
1147f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1148f49b12c6SRichard Henderson         int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1149f49b12c6SRichard Henderson         if (r < 0) {
1150f49b12c6SRichard Henderson             r = 0;
1151f49b12c6SRichard Henderson         }
1152f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1153f49b12c6SRichard Henderson     }
1154f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1155f49b12c6SRichard Henderson }
1156f49b12c6SRichard Henderson 
1157f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1158f49b12c6SRichard Henderson {
1159f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1160f49b12c6SRichard Henderson     intptr_t i;
1161f49b12c6SRichard Henderson 
1162f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1163f49b12c6SRichard Henderson         int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1164f49b12c6SRichard Henderson         if (r < 0) {
1165f49b12c6SRichard Henderson             r = 0;
1166f49b12c6SRichard Henderson         }
1167f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
1168f49b12c6SRichard Henderson     }
1169f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1170f49b12c6SRichard Henderson }
1171f49b12c6SRichard Henderson 
1172f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1173f49b12c6SRichard Henderson {
1174f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1175f49b12c6SRichard Henderson     intptr_t i;
1176f49b12c6SRichard Henderson 
1177f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1178f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1179f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
1180f49b12c6SRichard Henderson         uint32_t di = ai - bi;
1181f49b12c6SRichard Henderson         if (ai < bi) {
1182f49b12c6SRichard Henderson             di = 0;
1183f49b12c6SRichard Henderson         }
1184f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1185f49b12c6SRichard Henderson     }
1186f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1187f49b12c6SRichard Henderson }
1188f49b12c6SRichard Henderson 
1189f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1190f49b12c6SRichard Henderson {
1191f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1192f49b12c6SRichard Henderson     intptr_t i;
1193f49b12c6SRichard Henderson 
1194f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1195f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1196f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
1197f49b12c6SRichard Henderson         uint64_t di = ai - bi;
1198f49b12c6SRichard Henderson         if (ai < bi) {
1199f49b12c6SRichard Henderson             di = 0;
1200f49b12c6SRichard Henderson         }
1201f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1202f49b12c6SRichard Henderson     }
1203f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1204f49b12c6SRichard Henderson }
1205dd0a0fcdSRichard Henderson 
1206dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1207dd0a0fcdSRichard Henderson {
1208dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1209dd0a0fcdSRichard Henderson     intptr_t i;
1210dd0a0fcdSRichard Henderson 
1211dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1212dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1213dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1214dd0a0fcdSRichard Henderson         int8_t dd = aa < bb ? aa : bb;
1215dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1216dd0a0fcdSRichard Henderson     }
1217dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1218dd0a0fcdSRichard Henderson }
1219dd0a0fcdSRichard Henderson 
1220dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1221dd0a0fcdSRichard Henderson {
1222dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1223dd0a0fcdSRichard Henderson     intptr_t i;
1224dd0a0fcdSRichard Henderson 
1225dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1226dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1227dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1228dd0a0fcdSRichard Henderson         int16_t dd = aa < bb ? aa : bb;
1229dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1230dd0a0fcdSRichard Henderson     }
1231dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1232dd0a0fcdSRichard Henderson }
1233dd0a0fcdSRichard Henderson 
1234dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1235dd0a0fcdSRichard Henderson {
1236dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1237dd0a0fcdSRichard Henderson     intptr_t i;
1238dd0a0fcdSRichard Henderson 
1239dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1240dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1241dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1242dd0a0fcdSRichard Henderson         int32_t dd = aa < bb ? aa : bb;
1243dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1244dd0a0fcdSRichard Henderson     }
1245dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1246dd0a0fcdSRichard Henderson }
1247dd0a0fcdSRichard Henderson 
1248dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1249dd0a0fcdSRichard Henderson {
1250dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1251dd0a0fcdSRichard Henderson     intptr_t i;
1252dd0a0fcdSRichard Henderson 
1253dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1254dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1255dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1256dd0a0fcdSRichard Henderson         int64_t dd = aa < bb ? aa : bb;
1257dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1258dd0a0fcdSRichard Henderson     }
1259dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1260dd0a0fcdSRichard Henderson }
1261dd0a0fcdSRichard Henderson 
1262dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1263dd0a0fcdSRichard Henderson {
1264dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1265dd0a0fcdSRichard Henderson     intptr_t i;
1266dd0a0fcdSRichard Henderson 
1267dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1268dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1269dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1270dd0a0fcdSRichard Henderson         int8_t dd = aa > bb ? aa : bb;
1271dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1272dd0a0fcdSRichard Henderson     }
1273dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1274dd0a0fcdSRichard Henderson }
1275dd0a0fcdSRichard Henderson 
1276dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1277dd0a0fcdSRichard Henderson {
1278dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1279dd0a0fcdSRichard Henderson     intptr_t i;
1280dd0a0fcdSRichard Henderson 
1281dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1282dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1283dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1284dd0a0fcdSRichard Henderson         int16_t dd = aa > bb ? aa : bb;
1285dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1286dd0a0fcdSRichard Henderson     }
1287dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1288dd0a0fcdSRichard Henderson }
1289dd0a0fcdSRichard Henderson 
1290dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1291dd0a0fcdSRichard Henderson {
1292dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1293dd0a0fcdSRichard Henderson     intptr_t i;
1294dd0a0fcdSRichard Henderson 
1295dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1296dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1297dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1298dd0a0fcdSRichard Henderson         int32_t dd = aa > bb ? aa : bb;
1299dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1300dd0a0fcdSRichard Henderson     }
1301dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1302dd0a0fcdSRichard Henderson }
1303dd0a0fcdSRichard Henderson 
1304dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1305dd0a0fcdSRichard Henderson {
1306dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1307dd0a0fcdSRichard Henderson     intptr_t i;
1308dd0a0fcdSRichard Henderson 
1309dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1310dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1311dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1312dd0a0fcdSRichard Henderson         int64_t dd = aa > bb ? aa : bb;
1313dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1314dd0a0fcdSRichard Henderson     }
1315dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1316dd0a0fcdSRichard Henderson }
1317dd0a0fcdSRichard Henderson 
1318dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1319dd0a0fcdSRichard Henderson {
1320dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1321dd0a0fcdSRichard Henderson     intptr_t i;
1322dd0a0fcdSRichard Henderson 
1323dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1324dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1325dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1326dd0a0fcdSRichard Henderson         uint8_t dd = aa < bb ? aa : bb;
1327dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1328dd0a0fcdSRichard Henderson     }
1329dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1330dd0a0fcdSRichard Henderson }
1331dd0a0fcdSRichard Henderson 
1332dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1333dd0a0fcdSRichard Henderson {
1334dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1335dd0a0fcdSRichard Henderson     intptr_t i;
1336dd0a0fcdSRichard Henderson 
1337dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1338dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1339dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1340dd0a0fcdSRichard Henderson         uint16_t dd = aa < bb ? aa : bb;
1341dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1342dd0a0fcdSRichard Henderson     }
1343dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1344dd0a0fcdSRichard Henderson }
1345dd0a0fcdSRichard Henderson 
1346dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1347dd0a0fcdSRichard Henderson {
1348dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1349dd0a0fcdSRichard Henderson     intptr_t i;
1350dd0a0fcdSRichard Henderson 
1351dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1352dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1353dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1354dd0a0fcdSRichard Henderson         uint32_t dd = aa < bb ? aa : bb;
1355dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1356dd0a0fcdSRichard Henderson     }
1357dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1358dd0a0fcdSRichard Henderson }
1359dd0a0fcdSRichard Henderson 
1360dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1361dd0a0fcdSRichard Henderson {
1362dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1363dd0a0fcdSRichard Henderson     intptr_t i;
1364dd0a0fcdSRichard Henderson 
1365dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1366dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1367dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1368dd0a0fcdSRichard Henderson         uint64_t dd = aa < bb ? aa : bb;
1369dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1370dd0a0fcdSRichard Henderson     }
1371dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1372dd0a0fcdSRichard Henderson }
1373dd0a0fcdSRichard Henderson 
1374dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1375dd0a0fcdSRichard Henderson {
1376dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1377dd0a0fcdSRichard Henderson     intptr_t i;
1378dd0a0fcdSRichard Henderson 
1379dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1380dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1381dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1382dd0a0fcdSRichard Henderson         uint8_t dd = aa > bb ? aa : bb;
1383dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1384dd0a0fcdSRichard Henderson     }
1385dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1386dd0a0fcdSRichard Henderson }
1387dd0a0fcdSRichard Henderson 
1388dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1389dd0a0fcdSRichard Henderson {
1390dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1391dd0a0fcdSRichard Henderson     intptr_t i;
1392dd0a0fcdSRichard Henderson 
1393dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1394dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1395dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1396dd0a0fcdSRichard Henderson         uint16_t dd = aa > bb ? aa : bb;
1397dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1398dd0a0fcdSRichard Henderson     }
1399dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1400dd0a0fcdSRichard Henderson }
1401dd0a0fcdSRichard Henderson 
1402dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1403dd0a0fcdSRichard Henderson {
1404dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1405dd0a0fcdSRichard Henderson     intptr_t i;
1406dd0a0fcdSRichard Henderson 
1407dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1408dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1409dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1410dd0a0fcdSRichard Henderson         uint32_t dd = aa > bb ? aa : bb;
1411dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1412dd0a0fcdSRichard Henderson     }
1413dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1414dd0a0fcdSRichard Henderson }
1415dd0a0fcdSRichard Henderson 
1416dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1417dd0a0fcdSRichard Henderson {
1418dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1419dd0a0fcdSRichard Henderson     intptr_t i;
1420dd0a0fcdSRichard Henderson 
1421dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1422dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1423dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1424dd0a0fcdSRichard Henderson         uint64_t dd = aa > bb ? aa : bb;
1425dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1426dd0a0fcdSRichard Henderson     }
1427dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1428dd0a0fcdSRichard Henderson }
142938dc1294SRichard Henderson 
143038dc1294SRichard Henderson void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
143138dc1294SRichard Henderson {
143238dc1294SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
143338dc1294SRichard Henderson     intptr_t i;
143438dc1294SRichard Henderson 
14356c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
14366c7ab301SRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
14376c7ab301SRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
14386c7ab301SRichard Henderson         uint64_t cc = *(uint64_t *)(c + i);
14396c7ab301SRichard Henderson         *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
144038dc1294SRichard Henderson     }
144138dc1294SRichard Henderson     clear_high(d, oprsz, desc);
144238dc1294SRichard Henderson }
1443