1db432672SRichard Henderson /*
2db432672SRichard Henderson * Generic vectorized operation runtime
3db432672SRichard Henderson *
4db432672SRichard Henderson * Copyright (c) 2018 Linaro
5db432672SRichard Henderson *
6db432672SRichard Henderson * This library is free software; you can redistribute it and/or
7db432672SRichard Henderson * modify it under the terms of the GNU Lesser General Public
8db432672SRichard Henderson * License as published by the Free Software Foundation; either
9fb0343d5SThomas Huth * version 2.1 of the License, or (at your option) any later version.
10db432672SRichard Henderson *
11db432672SRichard Henderson * This library is distributed in the hope that it will be useful,
12db432672SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of
13db432672SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14db432672SRichard Henderson * Lesser General Public License for more details.
15db432672SRichard Henderson *
16db432672SRichard Henderson * You should have received a copy of the GNU Lesser General Public
17db432672SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18db432672SRichard Henderson */
19db432672SRichard Henderson
20db432672SRichard Henderson #include "qemu/osdep.h"
21db432672SRichard Henderson #include "qemu/host-utils.h"
22c213ee2dSRichard Henderson #include "exec/helper-proto-common.h"
23dcb32f1dSPhilippe Mathieu-Daudé #include "tcg/tcg-gvec-desc.h"
24db432672SRichard Henderson
25db432672SRichard Henderson
clear_high(void * d,intptr_t oprsz,uint32_t desc)26db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
27db432672SRichard Henderson {
28db432672SRichard Henderson intptr_t maxsz = simd_maxsz(desc);
29db432672SRichard Henderson intptr_t i;
30db432672SRichard Henderson
31db432672SRichard Henderson if (unlikely(maxsz > oprsz)) {
32db432672SRichard Henderson for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
33db432672SRichard Henderson *(uint64_t *)(d + i) = 0;
34db432672SRichard Henderson }
35db432672SRichard Henderson }
36db432672SRichard Henderson }
37db432672SRichard Henderson
HELPER(gvec_add8)38db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
39db432672SRichard Henderson {
40db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
41db432672SRichard Henderson intptr_t i;
42db432672SRichard Henderson
436c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
446c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
45db432672SRichard Henderson }
46db432672SRichard Henderson clear_high(d, oprsz, desc);
47db432672SRichard Henderson }
48db432672SRichard Henderson
HELPER(gvec_add16)49db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
50db432672SRichard Henderson {
51db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
52db432672SRichard Henderson intptr_t i;
53db432672SRichard Henderson
546c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
556c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
56db432672SRichard Henderson }
57db432672SRichard Henderson clear_high(d, oprsz, desc);
58db432672SRichard Henderson }
59db432672SRichard Henderson
HELPER(gvec_add32)60db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
61db432672SRichard Henderson {
62db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
63db432672SRichard Henderson intptr_t i;
64db432672SRichard Henderson
656c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
666c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
67db432672SRichard Henderson }
68db432672SRichard Henderson clear_high(d, oprsz, desc);
69db432672SRichard Henderson }
70db432672SRichard Henderson
HELPER(gvec_add64)71db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
72db432672SRichard Henderson {
73db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
74db432672SRichard Henderson intptr_t i;
75db432672SRichard Henderson
766c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
776c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
78db432672SRichard Henderson }
79db432672SRichard Henderson clear_high(d, oprsz, desc);
80db432672SRichard Henderson }
81db432672SRichard Henderson
HELPER(gvec_adds8)8222fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
8322fc3527SRichard Henderson {
8422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
8522fc3527SRichard Henderson intptr_t i;
8622fc3527SRichard Henderson
876c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
880a83e43aSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
8922fc3527SRichard Henderson }
9022fc3527SRichard Henderson clear_high(d, oprsz, desc);
9122fc3527SRichard Henderson }
9222fc3527SRichard Henderson
HELPER(gvec_adds16)9322fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
9422fc3527SRichard Henderson {
9522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
9622fc3527SRichard Henderson intptr_t i;
9722fc3527SRichard Henderson
986c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
990a83e43aSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
10022fc3527SRichard Henderson }
10122fc3527SRichard Henderson clear_high(d, oprsz, desc);
10222fc3527SRichard Henderson }
10322fc3527SRichard Henderson
HELPER(gvec_adds32)10422fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
10522fc3527SRichard Henderson {
10622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
10722fc3527SRichard Henderson intptr_t i;
10822fc3527SRichard Henderson
1096c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1100a83e43aSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
11122fc3527SRichard Henderson }
11222fc3527SRichard Henderson clear_high(d, oprsz, desc);
11322fc3527SRichard Henderson }
11422fc3527SRichard Henderson
HELPER(gvec_adds64)11522fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
11622fc3527SRichard Henderson {
11722fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
11822fc3527SRichard Henderson intptr_t i;
11922fc3527SRichard Henderson
1206c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1210a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
12222fc3527SRichard Henderson }
12322fc3527SRichard Henderson clear_high(d, oprsz, desc);
12422fc3527SRichard Henderson }
12522fc3527SRichard Henderson
HELPER(gvec_sub8)126db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
127db432672SRichard Henderson {
128db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
129db432672SRichard Henderson intptr_t i;
130db432672SRichard Henderson
1316c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1326c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
133db432672SRichard Henderson }
134db432672SRichard Henderson clear_high(d, oprsz, desc);
135db432672SRichard Henderson }
136db432672SRichard Henderson
HELPER(gvec_sub16)137db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
138db432672SRichard Henderson {
139db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
140db432672SRichard Henderson intptr_t i;
141db432672SRichard Henderson
1426c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1436c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
144db432672SRichard Henderson }
145db432672SRichard Henderson clear_high(d, oprsz, desc);
146db432672SRichard Henderson }
147db432672SRichard Henderson
HELPER(gvec_sub32)148db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
149db432672SRichard Henderson {
150db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
151db432672SRichard Henderson intptr_t i;
152db432672SRichard Henderson
1536c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1546c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
155db432672SRichard Henderson }
156db432672SRichard Henderson clear_high(d, oprsz, desc);
157db432672SRichard Henderson }
158db432672SRichard Henderson
HELPER(gvec_sub64)159db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
160db432672SRichard Henderson {
161db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
162db432672SRichard Henderson intptr_t i;
163db432672SRichard Henderson
1646c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1656c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
166db432672SRichard Henderson }
167db432672SRichard Henderson clear_high(d, oprsz, desc);
168db432672SRichard Henderson }
169db432672SRichard Henderson
HELPER(gvec_subs8)17022fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
17122fc3527SRichard Henderson {
17222fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
17322fc3527SRichard Henderson intptr_t i;
17422fc3527SRichard Henderson
1756c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1760a83e43aSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
17722fc3527SRichard Henderson }
17822fc3527SRichard Henderson clear_high(d, oprsz, desc);
17922fc3527SRichard Henderson }
18022fc3527SRichard Henderson
HELPER(gvec_subs16)18122fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
18222fc3527SRichard Henderson {
18322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
18422fc3527SRichard Henderson intptr_t i;
18522fc3527SRichard Henderson
1866c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1870a83e43aSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
18822fc3527SRichard Henderson }
18922fc3527SRichard Henderson clear_high(d, oprsz, desc);
19022fc3527SRichard Henderson }
19122fc3527SRichard Henderson
HELPER(gvec_subs32)19222fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
19322fc3527SRichard Henderson {
19422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
19522fc3527SRichard Henderson intptr_t i;
19622fc3527SRichard Henderson
1976c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1980a83e43aSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
19922fc3527SRichard Henderson }
20022fc3527SRichard Henderson clear_high(d, oprsz, desc);
20122fc3527SRichard Henderson }
20222fc3527SRichard Henderson
HELPER(gvec_subs64)20322fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
20422fc3527SRichard Henderson {
20522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
20622fc3527SRichard Henderson intptr_t i;
20722fc3527SRichard Henderson
2086c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2090a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
21022fc3527SRichard Henderson }
21122fc3527SRichard Henderson clear_high(d, oprsz, desc);
21222fc3527SRichard Henderson }
21322fc3527SRichard Henderson
HELPER(gvec_mul8)2143774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
2153774030aSRichard Henderson {
2163774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
2173774030aSRichard Henderson intptr_t i;
2183774030aSRichard Henderson
2196c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
2206c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
2213774030aSRichard Henderson }
2223774030aSRichard Henderson clear_high(d, oprsz, desc);
2233774030aSRichard Henderson }
2243774030aSRichard Henderson
HELPER(gvec_mul16)2253774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
2263774030aSRichard Henderson {
2273774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
2283774030aSRichard Henderson intptr_t i;
2293774030aSRichard Henderson
2306c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
2316c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
2323774030aSRichard Henderson }
2333774030aSRichard Henderson clear_high(d, oprsz, desc);
2343774030aSRichard Henderson }
2353774030aSRichard Henderson
HELPER(gvec_mul32)2363774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
2373774030aSRichard Henderson {
2383774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
2393774030aSRichard Henderson intptr_t i;
2403774030aSRichard Henderson
2416c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
2426c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
2433774030aSRichard Henderson }
2443774030aSRichard Henderson clear_high(d, oprsz, desc);
2453774030aSRichard Henderson }
2463774030aSRichard Henderson
HELPER(gvec_mul64)2473774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
2483774030aSRichard Henderson {
2493774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
2503774030aSRichard Henderson intptr_t i;
2513774030aSRichard Henderson
2526c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2536c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
2543774030aSRichard Henderson }
2553774030aSRichard Henderson clear_high(d, oprsz, desc);
2563774030aSRichard Henderson }
2573774030aSRichard Henderson
HELPER(gvec_muls8)25822fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
25922fc3527SRichard Henderson {
26022fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
26122fc3527SRichard Henderson intptr_t i;
26222fc3527SRichard Henderson
2636c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
2640a83e43aSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
26522fc3527SRichard Henderson }
26622fc3527SRichard Henderson clear_high(d, oprsz, desc);
26722fc3527SRichard Henderson }
26822fc3527SRichard Henderson
HELPER(gvec_muls16)26922fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
27022fc3527SRichard Henderson {
27122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
27222fc3527SRichard Henderson intptr_t i;
27322fc3527SRichard Henderson
2746c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
2750a83e43aSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
27622fc3527SRichard Henderson }
27722fc3527SRichard Henderson clear_high(d, oprsz, desc);
27822fc3527SRichard Henderson }
27922fc3527SRichard Henderson
HELPER(gvec_muls32)28022fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
28122fc3527SRichard Henderson {
28222fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
28322fc3527SRichard Henderson intptr_t i;
28422fc3527SRichard Henderson
2856c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
2860a83e43aSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
28722fc3527SRichard Henderson }
28822fc3527SRichard Henderson clear_high(d, oprsz, desc);
28922fc3527SRichard Henderson }
29022fc3527SRichard Henderson
HELPER(gvec_muls64)29122fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
29222fc3527SRichard Henderson {
29322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
29422fc3527SRichard Henderson intptr_t i;
29522fc3527SRichard Henderson
2966c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
2970a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
29822fc3527SRichard Henderson }
29922fc3527SRichard Henderson clear_high(d, oprsz, desc);
30022fc3527SRichard Henderson }
30122fc3527SRichard Henderson
HELPER(gvec_neg8)302db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
303db432672SRichard Henderson {
304db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
305db432672SRichard Henderson intptr_t i;
306db432672SRichard Henderson
3076c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
3086c7ab301SRichard Henderson *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
309db432672SRichard Henderson }
310db432672SRichard Henderson clear_high(d, oprsz, desc);
311db432672SRichard Henderson }
312db432672SRichard Henderson
HELPER(gvec_neg16)313db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
314db432672SRichard Henderson {
315db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
316db432672SRichard Henderson intptr_t i;
317db432672SRichard Henderson
3186c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
3196c7ab301SRichard Henderson *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
320db432672SRichard Henderson }
321db432672SRichard Henderson clear_high(d, oprsz, desc);
322db432672SRichard Henderson }
323db432672SRichard Henderson
HELPER(gvec_neg32)324db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
325db432672SRichard Henderson {
326db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
327db432672SRichard Henderson intptr_t i;
328db432672SRichard Henderson
3296c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
3306c7ab301SRichard Henderson *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
331db432672SRichard Henderson }
332db432672SRichard Henderson clear_high(d, oprsz, desc);
333db432672SRichard Henderson }
334db432672SRichard Henderson
HELPER(gvec_neg64)335db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
336db432672SRichard Henderson {
337db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
338db432672SRichard Henderson intptr_t i;
339db432672SRichard Henderson
3406c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
3416c7ab301SRichard Henderson *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
342db432672SRichard Henderson }
343db432672SRichard Henderson clear_high(d, oprsz, desc);
344db432672SRichard Henderson }
345db432672SRichard Henderson
HELPER(gvec_abs8)346bcefc902SRichard Henderson void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
347bcefc902SRichard Henderson {
348bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
349bcefc902SRichard Henderson intptr_t i;
350bcefc902SRichard Henderson
351bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) {
352bcefc902SRichard Henderson int8_t aa = *(int8_t *)(a + i);
353bcefc902SRichard Henderson *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
354bcefc902SRichard Henderson }
355bcefc902SRichard Henderson clear_high(d, oprsz, desc);
356bcefc902SRichard Henderson }
357bcefc902SRichard Henderson
HELPER(gvec_abs16)358bcefc902SRichard Henderson void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
359bcefc902SRichard Henderson {
360bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
361bcefc902SRichard Henderson intptr_t i;
362bcefc902SRichard Henderson
363bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) {
364bcefc902SRichard Henderson int16_t aa = *(int16_t *)(a + i);
365bcefc902SRichard Henderson *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
366bcefc902SRichard Henderson }
367bcefc902SRichard Henderson clear_high(d, oprsz, desc);
368bcefc902SRichard Henderson }
369bcefc902SRichard Henderson
HELPER(gvec_abs32)370bcefc902SRichard Henderson void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
371bcefc902SRichard Henderson {
372bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
373bcefc902SRichard Henderson intptr_t i;
374bcefc902SRichard Henderson
375bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) {
376bcefc902SRichard Henderson int32_t aa = *(int32_t *)(a + i);
377bcefc902SRichard Henderson *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
378bcefc902SRichard Henderson }
379bcefc902SRichard Henderson clear_high(d, oprsz, desc);
380bcefc902SRichard Henderson }
381bcefc902SRichard Henderson
HELPER(gvec_abs64)382bcefc902SRichard Henderson void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
383bcefc902SRichard Henderson {
384bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
385bcefc902SRichard Henderson intptr_t i;
386bcefc902SRichard Henderson
387bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) {
388bcefc902SRichard Henderson int64_t aa = *(int64_t *)(a + i);
389bcefc902SRichard Henderson *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
390bcefc902SRichard Henderson }
391bcefc902SRichard Henderson clear_high(d, oprsz, desc);
392bcefc902SRichard Henderson }
393bcefc902SRichard Henderson
HELPER(gvec_mov)394db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
395db432672SRichard Henderson {
396db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
397db432672SRichard Henderson
398db432672SRichard Henderson memcpy(d, a, oprsz);
399db432672SRichard Henderson clear_high(d, oprsz, desc);
400db432672SRichard Henderson }
401db432672SRichard Henderson
HELPER(gvec_dup64)402db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
403db432672SRichard Henderson {
404db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
405db432672SRichard Henderson intptr_t i;
406db432672SRichard Henderson
407db432672SRichard Henderson if (c == 0) {
408db432672SRichard Henderson oprsz = 0;
409db432672SRichard Henderson } else {
410db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
411db432672SRichard Henderson *(uint64_t *)(d + i) = c;
412db432672SRichard Henderson }
413db432672SRichard Henderson }
414db432672SRichard Henderson clear_high(d, oprsz, desc);
415db432672SRichard Henderson }
416db432672SRichard Henderson
HELPER(gvec_dup32)417db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
418db432672SRichard Henderson {
419db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
420db432672SRichard Henderson intptr_t i;
421db432672SRichard Henderson
422db432672SRichard Henderson if (c == 0) {
423db432672SRichard Henderson oprsz = 0;
424db432672SRichard Henderson } else {
425db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
426db432672SRichard Henderson *(uint32_t *)(d + i) = c;
427db432672SRichard Henderson }
428db432672SRichard Henderson }
429db432672SRichard Henderson clear_high(d, oprsz, desc);
430db432672SRichard Henderson }
431db432672SRichard Henderson
HELPER(gvec_dup16)432db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
433db432672SRichard Henderson {
434db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
435db432672SRichard Henderson }
436db432672SRichard Henderson
HELPER(gvec_dup8)437db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
438db432672SRichard Henderson {
439db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
440db432672SRichard Henderson }
441db432672SRichard Henderson
HELPER(gvec_not)442db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
443db432672SRichard Henderson {
444db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
445db432672SRichard Henderson intptr_t i;
446db432672SRichard Henderson
4476c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4486c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
449db432672SRichard Henderson }
450db432672SRichard Henderson clear_high(d, oprsz, desc);
451db432672SRichard Henderson }
452db432672SRichard Henderson
HELPER(gvec_and)453db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
454db432672SRichard Henderson {
455db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
456db432672SRichard Henderson intptr_t i;
457db432672SRichard Henderson
4586c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4596c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
460db432672SRichard Henderson }
461db432672SRichard Henderson clear_high(d, oprsz, desc);
462db432672SRichard Henderson }
463db432672SRichard Henderson
HELPER(gvec_or)464db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
465db432672SRichard Henderson {
466db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
467db432672SRichard Henderson intptr_t i;
468db432672SRichard Henderson
4696c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4706c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
471db432672SRichard Henderson }
472db432672SRichard Henderson clear_high(d, oprsz, desc);
473db432672SRichard Henderson }
474db432672SRichard Henderson
HELPER(gvec_xor)475db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
476db432672SRichard Henderson {
477db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
478db432672SRichard Henderson intptr_t i;
479db432672SRichard Henderson
4806c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4816c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
482db432672SRichard Henderson }
483db432672SRichard Henderson clear_high(d, oprsz, desc);
484db432672SRichard Henderson }
485db432672SRichard Henderson
HELPER(gvec_andc)486db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
487db432672SRichard Henderson {
488db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
489db432672SRichard Henderson intptr_t i;
490db432672SRichard Henderson
4916c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
4926c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
493db432672SRichard Henderson }
494db432672SRichard Henderson clear_high(d, oprsz, desc);
495db432672SRichard Henderson }
496db432672SRichard Henderson
HELPER(gvec_orc)497db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
498db432672SRichard Henderson {
499db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
500db432672SRichard Henderson intptr_t i;
501db432672SRichard Henderson
5026c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5036c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
504db432672SRichard Henderson }
505db432672SRichard Henderson clear_high(d, oprsz, desc);
506db432672SRichard Henderson }
507d0ec9796SRichard Henderson
HELPER(gvec_nand)508f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
509f550805dSRichard Henderson {
510f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
511f550805dSRichard Henderson intptr_t i;
512f550805dSRichard Henderson
5136c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5146c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
515f550805dSRichard Henderson }
516f550805dSRichard Henderson clear_high(d, oprsz, desc);
517f550805dSRichard Henderson }
518f550805dSRichard Henderson
HELPER(gvec_nor)519f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
520f550805dSRichard Henderson {
521f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
522f550805dSRichard Henderson intptr_t i;
523f550805dSRichard Henderson
5246c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5256c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
526f550805dSRichard Henderson }
527f550805dSRichard Henderson clear_high(d, oprsz, desc);
528f550805dSRichard Henderson }
529f550805dSRichard Henderson
HELPER(gvec_eqv)530f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
531f550805dSRichard Henderson {
532f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
533f550805dSRichard Henderson intptr_t i;
534f550805dSRichard Henderson
5356c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5366c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
537f550805dSRichard Henderson }
538f550805dSRichard Henderson clear_high(d, oprsz, desc);
539f550805dSRichard Henderson }
540f550805dSRichard Henderson
HELPER(gvec_ands)54122fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
54222fc3527SRichard Henderson {
54322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
54422fc3527SRichard Henderson intptr_t i;
54522fc3527SRichard Henderson
5466c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5470a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
54822fc3527SRichard Henderson }
54922fc3527SRichard Henderson clear_high(d, oprsz, desc);
55022fc3527SRichard Henderson }
55122fc3527SRichard Henderson
HELPER(gvec_andcs)5524221aa4aSNazar Kazakov void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
5534221aa4aSNazar Kazakov {
5544221aa4aSNazar Kazakov intptr_t oprsz = simd_oprsz(desc);
5554221aa4aSNazar Kazakov intptr_t i;
5564221aa4aSNazar Kazakov
5574221aa4aSNazar Kazakov for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5584221aa4aSNazar Kazakov *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
5594221aa4aSNazar Kazakov }
5604221aa4aSNazar Kazakov clear_high(d, oprsz, desc);
5614221aa4aSNazar Kazakov }
5624221aa4aSNazar Kazakov
HELPER(gvec_xors)56322fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
56422fc3527SRichard Henderson {
56522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
56622fc3527SRichard Henderson intptr_t i;
56722fc3527SRichard Henderson
5686c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5690a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
57022fc3527SRichard Henderson }
57122fc3527SRichard Henderson clear_high(d, oprsz, desc);
57222fc3527SRichard Henderson }
57322fc3527SRichard Henderson
HELPER(gvec_ors)57422fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
57522fc3527SRichard Henderson {
57622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
57722fc3527SRichard Henderson intptr_t i;
57822fc3527SRichard Henderson
5796c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
5800a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
58122fc3527SRichard Henderson }
58222fc3527SRichard Henderson clear_high(d, oprsz, desc);
58322fc3527SRichard Henderson }
58422fc3527SRichard Henderson
HELPER(gvec_shl8i)585d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
586d0ec9796SRichard Henderson {
587d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
588d0ec9796SRichard Henderson int shift = simd_data(desc);
589d0ec9796SRichard Henderson intptr_t i;
590d0ec9796SRichard Henderson
5916c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
5926c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
593d0ec9796SRichard Henderson }
594d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
595d0ec9796SRichard Henderson }
596d0ec9796SRichard Henderson
HELPER(gvec_shl16i)597d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
598d0ec9796SRichard Henderson {
599d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
600d0ec9796SRichard Henderson int shift = simd_data(desc);
601d0ec9796SRichard Henderson intptr_t i;
602d0ec9796SRichard Henderson
6036c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
6046c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
605d0ec9796SRichard Henderson }
606d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
607d0ec9796SRichard Henderson }
608d0ec9796SRichard Henderson
HELPER(gvec_shl32i)609d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
610d0ec9796SRichard Henderson {
611d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
612d0ec9796SRichard Henderson int shift = simd_data(desc);
613d0ec9796SRichard Henderson intptr_t i;
614d0ec9796SRichard Henderson
6156c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
6166c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
617d0ec9796SRichard Henderson }
618d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
619d0ec9796SRichard Henderson }
620d0ec9796SRichard Henderson
HELPER(gvec_shl64i)621d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
622d0ec9796SRichard Henderson {
623d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
624d0ec9796SRichard Henderson int shift = simd_data(desc);
625d0ec9796SRichard Henderson intptr_t i;
626d0ec9796SRichard Henderson
6276c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
6286c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
629d0ec9796SRichard Henderson }
630d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
631d0ec9796SRichard Henderson }
632d0ec9796SRichard Henderson
HELPER(gvec_shr8i)633d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
634d0ec9796SRichard Henderson {
635d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
636d0ec9796SRichard Henderson int shift = simd_data(desc);
637d0ec9796SRichard Henderson intptr_t i;
638d0ec9796SRichard Henderson
6396c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
6406c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
641d0ec9796SRichard Henderson }
642d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
643d0ec9796SRichard Henderson }
644d0ec9796SRichard Henderson
HELPER(gvec_shr16i)645d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
646d0ec9796SRichard Henderson {
647d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
648d0ec9796SRichard Henderson int shift = simd_data(desc);
649d0ec9796SRichard Henderson intptr_t i;
650d0ec9796SRichard Henderson
6516c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
6526c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
653d0ec9796SRichard Henderson }
654d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
655d0ec9796SRichard Henderson }
656d0ec9796SRichard Henderson
HELPER(gvec_shr32i)657d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
658d0ec9796SRichard Henderson {
659d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
660d0ec9796SRichard Henderson int shift = simd_data(desc);
661d0ec9796SRichard Henderson intptr_t i;
662d0ec9796SRichard Henderson
6636c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
6646c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
665d0ec9796SRichard Henderson }
666d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
667d0ec9796SRichard Henderson }
668d0ec9796SRichard Henderson
HELPER(gvec_shr64i)669d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
670d0ec9796SRichard Henderson {
671d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
672d0ec9796SRichard Henderson int shift = simd_data(desc);
673d0ec9796SRichard Henderson intptr_t i;
674d0ec9796SRichard Henderson
6756c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
6766c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
677d0ec9796SRichard Henderson }
678d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
679d0ec9796SRichard Henderson }
680d0ec9796SRichard Henderson
HELPER(gvec_sar8i)681d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
682d0ec9796SRichard Henderson {
683d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
684d0ec9796SRichard Henderson int shift = simd_data(desc);
685d0ec9796SRichard Henderson intptr_t i;
686d0ec9796SRichard Henderson
6876c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
6886c7ab301SRichard Henderson *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
689d0ec9796SRichard Henderson }
690d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
691d0ec9796SRichard Henderson }
692d0ec9796SRichard Henderson
HELPER(gvec_sar16i)693d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
694d0ec9796SRichard Henderson {
695d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
696d0ec9796SRichard Henderson int shift = simd_data(desc);
697d0ec9796SRichard Henderson intptr_t i;
698d0ec9796SRichard Henderson
6996c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
7006c7ab301SRichard Henderson *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
701d0ec9796SRichard Henderson }
702d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
703d0ec9796SRichard Henderson }
704d0ec9796SRichard Henderson
HELPER(gvec_sar32i)705d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
706d0ec9796SRichard Henderson {
707d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
708d0ec9796SRichard Henderson int shift = simd_data(desc);
709d0ec9796SRichard Henderson intptr_t i;
710d0ec9796SRichard Henderson
7116c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
7126c7ab301SRichard Henderson *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
713d0ec9796SRichard Henderson }
714d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
715d0ec9796SRichard Henderson }
716d0ec9796SRichard Henderson
HELPER(gvec_sar64i)717d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
718d0ec9796SRichard Henderson {
719d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
720d0ec9796SRichard Henderson int shift = simd_data(desc);
721d0ec9796SRichard Henderson intptr_t i;
722d0ec9796SRichard Henderson
7236c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
7246c7ab301SRichard Henderson *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
725d0ec9796SRichard Henderson }
726d0ec9796SRichard Henderson clear_high(d, oprsz, desc);
727d0ec9796SRichard Henderson }
728212be173SRichard Henderson
HELPER(gvec_rotl8i)729b0f7e744SRichard Henderson void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
730b0f7e744SRichard Henderson {
731b0f7e744SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
732b0f7e744SRichard Henderson int shift = simd_data(desc);
733b0f7e744SRichard Henderson intptr_t i;
734b0f7e744SRichard Henderson
735b0f7e744SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
736b0f7e744SRichard Henderson *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
737b0f7e744SRichard Henderson }
738b0f7e744SRichard Henderson clear_high(d, oprsz, desc);
739b0f7e744SRichard Henderson }
740b0f7e744SRichard Henderson
HELPER(gvec_rotl16i)741b0f7e744SRichard Henderson void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
742b0f7e744SRichard Henderson {
743b0f7e744SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
744b0f7e744SRichard Henderson int shift = simd_data(desc);
745b0f7e744SRichard Henderson intptr_t i;
746b0f7e744SRichard Henderson
747b0f7e744SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
748b0f7e744SRichard Henderson *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
749b0f7e744SRichard Henderson }
750b0f7e744SRichard Henderson clear_high(d, oprsz, desc);
751b0f7e744SRichard Henderson }
752b0f7e744SRichard Henderson
HELPER(gvec_rotl32i)753b0f7e744SRichard Henderson void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
754b0f7e744SRichard Henderson {
755b0f7e744SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
756b0f7e744SRichard Henderson int shift = simd_data(desc);
757b0f7e744SRichard Henderson intptr_t i;
758b0f7e744SRichard Henderson
759b0f7e744SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
760b0f7e744SRichard Henderson *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
761b0f7e744SRichard Henderson }
762b0f7e744SRichard Henderson clear_high(d, oprsz, desc);
763b0f7e744SRichard Henderson }
764b0f7e744SRichard Henderson
HELPER(gvec_rotl64i)765b0f7e744SRichard Henderson void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
766b0f7e744SRichard Henderson {
767b0f7e744SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
768b0f7e744SRichard Henderson int shift = simd_data(desc);
769b0f7e744SRichard Henderson intptr_t i;
770b0f7e744SRichard Henderson
771b0f7e744SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
772b0f7e744SRichard Henderson *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
773b0f7e744SRichard Henderson }
774b0f7e744SRichard Henderson clear_high(d, oprsz, desc);
775b0f7e744SRichard Henderson }
776b0f7e744SRichard Henderson
HELPER(gvec_shl8v)7775ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
7785ee5c14cSRichard Henderson {
7795ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
7805ee5c14cSRichard Henderson intptr_t i;
7815ee5c14cSRichard Henderson
7825ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
7835ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7;
7845ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
7855ee5c14cSRichard Henderson }
7865ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
7875ee5c14cSRichard Henderson }
7885ee5c14cSRichard Henderson
HELPER(gvec_shl16v)7895ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
7905ee5c14cSRichard Henderson {
7915ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
7925ee5c14cSRichard Henderson intptr_t i;
7935ee5c14cSRichard Henderson
7945ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
7955ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15;
7965ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
7975ee5c14cSRichard Henderson }
7985ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
7995ee5c14cSRichard Henderson }
8005ee5c14cSRichard Henderson
HELPER(gvec_shl32v)8015ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
8025ee5c14cSRichard Henderson {
8035ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
8045ee5c14cSRichard Henderson intptr_t i;
8055ee5c14cSRichard Henderson
8065ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
8075ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31;
8085ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
8095ee5c14cSRichard Henderson }
8105ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
8115ee5c14cSRichard Henderson }
8125ee5c14cSRichard Henderson
HELPER(gvec_shl64v)8135ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
8145ee5c14cSRichard Henderson {
8155ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
8165ee5c14cSRichard Henderson intptr_t i;
8175ee5c14cSRichard Henderson
8185ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8195ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63;
8205ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
8215ee5c14cSRichard Henderson }
8225ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
8235ee5c14cSRichard Henderson }
8245ee5c14cSRichard Henderson
HELPER(gvec_shr8v)8255ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
8265ee5c14cSRichard Henderson {
8275ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
8285ee5c14cSRichard Henderson intptr_t i;
8295ee5c14cSRichard Henderson
8305ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
8315ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7;
8325ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
8335ee5c14cSRichard Henderson }
8345ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
8355ee5c14cSRichard Henderson }
8365ee5c14cSRichard Henderson
HELPER(gvec_shr16v)8375ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
8385ee5c14cSRichard Henderson {
8395ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
8405ee5c14cSRichard Henderson intptr_t i;
8415ee5c14cSRichard Henderson
8425ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
8435ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15;
8445ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
8455ee5c14cSRichard Henderson }
8465ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
8475ee5c14cSRichard Henderson }
8485ee5c14cSRichard Henderson
HELPER(gvec_shr32v)8495ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
8505ee5c14cSRichard Henderson {
8515ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
8525ee5c14cSRichard Henderson intptr_t i;
8535ee5c14cSRichard Henderson
8545ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
8555ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31;
8565ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
8575ee5c14cSRichard Henderson }
8585ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
8595ee5c14cSRichard Henderson }
8605ee5c14cSRichard Henderson
HELPER(gvec_shr64v)8615ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
8625ee5c14cSRichard Henderson {
8635ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
8645ee5c14cSRichard Henderson intptr_t i;
8655ee5c14cSRichard Henderson
8665ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8675ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63;
8685ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
8695ee5c14cSRichard Henderson }
8705ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
8715ee5c14cSRichard Henderson }
8725ee5c14cSRichard Henderson
HELPER(gvec_sar8v)8735ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
8745ee5c14cSRichard Henderson {
8755ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
8765ee5c14cSRichard Henderson intptr_t i;
8775ee5c14cSRichard Henderson
878899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) {
8795ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7;
8805ee5c14cSRichard Henderson *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
8815ee5c14cSRichard Henderson }
8825ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
8835ee5c14cSRichard Henderson }
8845ee5c14cSRichard Henderson
HELPER(gvec_sar16v)8855ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
8865ee5c14cSRichard Henderson {
8875ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
8885ee5c14cSRichard Henderson intptr_t i;
8895ee5c14cSRichard Henderson
8905ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) {
8915ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15;
8925ee5c14cSRichard Henderson *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
8935ee5c14cSRichard Henderson }
8945ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
8955ee5c14cSRichard Henderson }
8965ee5c14cSRichard Henderson
HELPER(gvec_sar32v)8975ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
8985ee5c14cSRichard Henderson {
8995ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
9005ee5c14cSRichard Henderson intptr_t i;
9015ee5c14cSRichard Henderson
902899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) {
9035ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31;
9045ee5c14cSRichard Henderson *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
9055ee5c14cSRichard Henderson }
9065ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
9075ee5c14cSRichard Henderson }
9085ee5c14cSRichard Henderson
HELPER(gvec_sar64v)9095ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
9105ee5c14cSRichard Henderson {
9115ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
9125ee5c14cSRichard Henderson intptr_t i;
9135ee5c14cSRichard Henderson
914899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) {
9155ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63;
9165ee5c14cSRichard Henderson *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
9175ee5c14cSRichard Henderson }
9185ee5c14cSRichard Henderson clear_high(d, oprsz, desc);
9195ee5c14cSRichard Henderson }
9205ee5c14cSRichard Henderson
HELPER(gvec_rotl8v)9215d0ceda9SRichard Henderson void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
9225d0ceda9SRichard Henderson {
9235d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
9245d0ceda9SRichard Henderson intptr_t i;
9255d0ceda9SRichard Henderson
9265d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
9275d0ceda9SRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7;
9285d0ceda9SRichard Henderson *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
9295d0ceda9SRichard Henderson }
9305d0ceda9SRichard Henderson clear_high(d, oprsz, desc);
9315d0ceda9SRichard Henderson }
9325d0ceda9SRichard Henderson
HELPER(gvec_rotl16v)9335d0ceda9SRichard Henderson void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
9345d0ceda9SRichard Henderson {
9355d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
9365d0ceda9SRichard Henderson intptr_t i;
9375d0ceda9SRichard Henderson
9385d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
9395d0ceda9SRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15;
9405d0ceda9SRichard Henderson *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
9415d0ceda9SRichard Henderson }
9425d0ceda9SRichard Henderson clear_high(d, oprsz, desc);
9435d0ceda9SRichard Henderson }
9445d0ceda9SRichard Henderson
HELPER(gvec_rotl32v)9455d0ceda9SRichard Henderson void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
9465d0ceda9SRichard Henderson {
9475d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
9485d0ceda9SRichard Henderson intptr_t i;
9495d0ceda9SRichard Henderson
9505d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
9515d0ceda9SRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31;
9525d0ceda9SRichard Henderson *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
9535d0ceda9SRichard Henderson }
9545d0ceda9SRichard Henderson clear_high(d, oprsz, desc);
9555d0ceda9SRichard Henderson }
9565d0ceda9SRichard Henderson
HELPER(gvec_rotl64v)9575d0ceda9SRichard Henderson void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
9585d0ceda9SRichard Henderson {
9595d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
9605d0ceda9SRichard Henderson intptr_t i;
9615d0ceda9SRichard Henderson
9625d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
9635d0ceda9SRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63;
9645d0ceda9SRichard Henderson *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
9655d0ceda9SRichard Henderson }
9665d0ceda9SRichard Henderson clear_high(d, oprsz, desc);
9675d0ceda9SRichard Henderson }
9685d0ceda9SRichard Henderson
HELPER(gvec_rotr8v)9695d0ceda9SRichard Henderson void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
9705d0ceda9SRichard Henderson {
9715d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
9725d0ceda9SRichard Henderson intptr_t i;
9735d0ceda9SRichard Henderson
9745d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
9755d0ceda9SRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7;
9765d0ceda9SRichard Henderson *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
9775d0ceda9SRichard Henderson }
9785d0ceda9SRichard Henderson clear_high(d, oprsz, desc);
9795d0ceda9SRichard Henderson }
9805d0ceda9SRichard Henderson
HELPER(gvec_rotr16v)9815d0ceda9SRichard Henderson void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
9825d0ceda9SRichard Henderson {
9835d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
9845d0ceda9SRichard Henderson intptr_t i;
9855d0ceda9SRichard Henderson
9865d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
9875d0ceda9SRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15;
9885d0ceda9SRichard Henderson *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
9895d0ceda9SRichard Henderson }
9905d0ceda9SRichard Henderson clear_high(d, oprsz, desc);
9915d0ceda9SRichard Henderson }
9925d0ceda9SRichard Henderson
HELPER(gvec_rotr32v)9935d0ceda9SRichard Henderson void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
9945d0ceda9SRichard Henderson {
9955d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
9965d0ceda9SRichard Henderson intptr_t i;
9975d0ceda9SRichard Henderson
9985d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
9995d0ceda9SRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31;
10005d0ceda9SRichard Henderson *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
10015d0ceda9SRichard Henderson }
10025d0ceda9SRichard Henderson clear_high(d, oprsz, desc);
10035d0ceda9SRichard Henderson }
10045d0ceda9SRichard Henderson
HELPER(gvec_rotr64v)10055d0ceda9SRichard Henderson void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
10065d0ceda9SRichard Henderson {
10075d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
10085d0ceda9SRichard Henderson intptr_t i;
10095d0ceda9SRichard Henderson
10105d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
10115d0ceda9SRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63;
10125d0ceda9SRichard Henderson *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
10135d0ceda9SRichard Henderson }
10145d0ceda9SRichard Henderson clear_high(d, oprsz, desc);
10155d0ceda9SRichard Henderson }
10165d0ceda9SRichard Henderson
1017212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP) \
1018212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
1019212be173SRichard Henderson { \
1020212be173SRichard Henderson intptr_t oprsz = simd_oprsz(desc); \
1021212be173SRichard Henderson intptr_t i; \
10226cb1d3b8SRichard Henderson for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
10230270bd50SRichard Henderson *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
1024212be173SRichard Henderson } \
1025212be173SRichard Henderson clear_high(d, oprsz, desc); \
1026212be173SRichard Henderson }
1027212be173SRichard Henderson
1028212be173SRichard Henderson #define DO_CMP2(SZ) \
10296c7ab301SRichard Henderson DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
10306c7ab301SRichard Henderson DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
10316c7ab301SRichard Henderson DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
10326c7ab301SRichard Henderson DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
10336c7ab301SRichard Henderson DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
10346c7ab301SRichard Henderson DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
1035212be173SRichard Henderson
1036212be173SRichard Henderson DO_CMP2(8)
1037212be173SRichard Henderson DO_CMP2(16)
1038212be173SRichard Henderson DO_CMP2(32)
1039212be173SRichard Henderson DO_CMP2(64)
1040212be173SRichard Henderson
1041212be173SRichard Henderson #undef DO_CMP1
1042212be173SRichard Henderson #undef DO_CMP2
1043f49b12c6SRichard Henderson
1044*9622c697SRichard Henderson #define DO_CMP1(NAME, TYPE, OP) \
1045*9622c697SRichard Henderson void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) \
1046*9622c697SRichard Henderson { \
1047*9622c697SRichard Henderson intptr_t oprsz = simd_oprsz(desc); \
1048*9622c697SRichard Henderson TYPE inv = simd_data(desc), b = b64; \
1049*9622c697SRichard Henderson for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \
1050*9622c697SRichard Henderson *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv); \
1051*9622c697SRichard Henderson } \
1052*9622c697SRichard Henderson clear_high(d, oprsz, desc); \
1053*9622c697SRichard Henderson }
1054*9622c697SRichard Henderson
1055*9622c697SRichard Henderson #define DO_CMP2(SZ) \
1056*9622c697SRichard Henderson DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \
1057*9622c697SRichard Henderson DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \
1058*9622c697SRichard Henderson DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \
1059*9622c697SRichard Henderson DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \
1060*9622c697SRichard Henderson DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
1061*9622c697SRichard Henderson
1062*9622c697SRichard Henderson DO_CMP2(8)
1063*9622c697SRichard Henderson DO_CMP2(16)
1064*9622c697SRichard Henderson DO_CMP2(32)
1065*9622c697SRichard Henderson DO_CMP2(64)
1066*9622c697SRichard Henderson
1067*9622c697SRichard Henderson #undef DO_CMP1
1068*9622c697SRichard Henderson #undef DO_CMP2
1069*9622c697SRichard Henderson
HELPER(gvec_ssadd8)1070f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
1071f49b12c6SRichard Henderson {
1072f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1073f49b12c6SRichard Henderson intptr_t i;
1074f49b12c6SRichard Henderson
1075f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1076f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
1077f49b12c6SRichard Henderson if (r > INT8_MAX) {
1078f49b12c6SRichard Henderson r = INT8_MAX;
1079f49b12c6SRichard Henderson } else if (r < INT8_MIN) {
1080f49b12c6SRichard Henderson r = INT8_MIN;
1081f49b12c6SRichard Henderson }
1082f49b12c6SRichard Henderson *(int8_t *)(d + i) = r;
1083f49b12c6SRichard Henderson }
1084f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1085f49b12c6SRichard Henderson }
1086f49b12c6SRichard Henderson
HELPER(gvec_ssadd16)1087f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
1088f49b12c6SRichard Henderson {
1089f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1090f49b12c6SRichard Henderson intptr_t i;
1091f49b12c6SRichard Henderson
1092f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1093f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
1094f49b12c6SRichard Henderson if (r > INT16_MAX) {
1095f49b12c6SRichard Henderson r = INT16_MAX;
1096f49b12c6SRichard Henderson } else if (r < INT16_MIN) {
1097f49b12c6SRichard Henderson r = INT16_MIN;
1098f49b12c6SRichard Henderson }
1099f49b12c6SRichard Henderson *(int16_t *)(d + i) = r;
1100f49b12c6SRichard Henderson }
1101f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1102f49b12c6SRichard Henderson }
1103f49b12c6SRichard Henderson
HELPER(gvec_ssadd32)1104f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
1105f49b12c6SRichard Henderson {
1106f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1107f49b12c6SRichard Henderson intptr_t i;
1108f49b12c6SRichard Henderson
1109f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1110f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i);
1111f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i);
11127702a855SRichard Henderson int32_t di;
11137702a855SRichard Henderson if (sadd32_overflow(ai, bi, &di)) {
1114f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN);
1115f49b12c6SRichard Henderson }
1116f49b12c6SRichard Henderson *(int32_t *)(d + i) = di;
1117f49b12c6SRichard Henderson }
1118f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1119f49b12c6SRichard Henderson }
1120f49b12c6SRichard Henderson
HELPER(gvec_ssadd64)1121f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1122f49b12c6SRichard Henderson {
1123f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1124f49b12c6SRichard Henderson intptr_t i;
1125f49b12c6SRichard Henderson
1126f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1127f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i);
1128f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i);
11297702a855SRichard Henderson int64_t di;
11307702a855SRichard Henderson if (sadd64_overflow(ai, bi, &di)) {
1131f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN);
1132f49b12c6SRichard Henderson }
1133f49b12c6SRichard Henderson *(int64_t *)(d + i) = di;
1134f49b12c6SRichard Henderson }
1135f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1136f49b12c6SRichard Henderson }
1137f49b12c6SRichard Henderson
HELPER(gvec_sssub8)1138f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1139f49b12c6SRichard Henderson {
1140f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1141f49b12c6SRichard Henderson intptr_t i;
1142f49b12c6SRichard Henderson
1143f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1144f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1145f49b12c6SRichard Henderson if (r > INT8_MAX) {
1146f49b12c6SRichard Henderson r = INT8_MAX;
1147f49b12c6SRichard Henderson } else if (r < INT8_MIN) {
1148f49b12c6SRichard Henderson r = INT8_MIN;
1149f49b12c6SRichard Henderson }
1150f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r;
1151f49b12c6SRichard Henderson }
1152f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1153f49b12c6SRichard Henderson }
1154f49b12c6SRichard Henderson
HELPER(gvec_sssub16)1155f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1156f49b12c6SRichard Henderson {
1157f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1158f49b12c6SRichard Henderson intptr_t i;
1159f49b12c6SRichard Henderson
1160f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1161f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1162f49b12c6SRichard Henderson if (r > INT16_MAX) {
1163f49b12c6SRichard Henderson r = INT16_MAX;
1164f49b12c6SRichard Henderson } else if (r < INT16_MIN) {
1165f49b12c6SRichard Henderson r = INT16_MIN;
1166f49b12c6SRichard Henderson }
1167f49b12c6SRichard Henderson *(int16_t *)(d + i) = r;
1168f49b12c6SRichard Henderson }
1169f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1170f49b12c6SRichard Henderson }
1171f49b12c6SRichard Henderson
HELPER(gvec_sssub32)1172f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1173f49b12c6SRichard Henderson {
1174f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1175f49b12c6SRichard Henderson intptr_t i;
1176f49b12c6SRichard Henderson
1177f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1178f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i);
1179f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i);
11807702a855SRichard Henderson int32_t di;
11817702a855SRichard Henderson if (ssub32_overflow(ai, bi, &di)) {
1182f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN);
1183f49b12c6SRichard Henderson }
1184f49b12c6SRichard Henderson *(int32_t *)(d + i) = di;
1185f49b12c6SRichard Henderson }
1186f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1187f49b12c6SRichard Henderson }
1188f49b12c6SRichard Henderson
HELPER(gvec_sssub64)1189f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1190f49b12c6SRichard Henderson {
1191f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1192f49b12c6SRichard Henderson intptr_t i;
1193f49b12c6SRichard Henderson
1194f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1195f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i);
1196f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i);
11977702a855SRichard Henderson int64_t di;
11987702a855SRichard Henderson if (ssub64_overflow(ai, bi, &di)) {
1199f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN);
1200f49b12c6SRichard Henderson }
1201f49b12c6SRichard Henderson *(int64_t *)(d + i) = di;
1202f49b12c6SRichard Henderson }
1203f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1204f49b12c6SRichard Henderson }
1205f49b12c6SRichard Henderson
HELPER(gvec_usadd8)1206f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1207f49b12c6SRichard Henderson {
1208f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1209f49b12c6SRichard Henderson intptr_t i;
1210f49b12c6SRichard Henderson
1211f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1212f49b12c6SRichard Henderson unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1213f49b12c6SRichard Henderson if (r > UINT8_MAX) {
1214f49b12c6SRichard Henderson r = UINT8_MAX;
1215f49b12c6SRichard Henderson }
1216f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r;
1217f49b12c6SRichard Henderson }
1218f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1219f49b12c6SRichard Henderson }
1220f49b12c6SRichard Henderson
HELPER(gvec_usadd16)1221f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1222f49b12c6SRichard Henderson {
1223f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1224f49b12c6SRichard Henderson intptr_t i;
1225f49b12c6SRichard Henderson
1226f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1227f49b12c6SRichard Henderson unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1228f49b12c6SRichard Henderson if (r > UINT16_MAX) {
1229f49b12c6SRichard Henderson r = UINT16_MAX;
1230f49b12c6SRichard Henderson }
1231f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r;
1232f49b12c6SRichard Henderson }
1233f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1234f49b12c6SRichard Henderson }
1235f49b12c6SRichard Henderson
HELPER(gvec_usadd32)1236f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1237f49b12c6SRichard Henderson {
1238f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1239f49b12c6SRichard Henderson intptr_t i;
1240f49b12c6SRichard Henderson
1241f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1242f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i);
1243f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i);
12447702a855SRichard Henderson uint32_t di;
12457702a855SRichard Henderson if (uadd32_overflow(ai, bi, &di)) {
1246f49b12c6SRichard Henderson di = UINT32_MAX;
1247f49b12c6SRichard Henderson }
1248f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di;
1249f49b12c6SRichard Henderson }
1250f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1251f49b12c6SRichard Henderson }
1252f49b12c6SRichard Henderson
HELPER(gvec_usadd64)1253f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1254f49b12c6SRichard Henderson {
1255f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1256f49b12c6SRichard Henderson intptr_t i;
1257f49b12c6SRichard Henderson
1258f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1259f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i);
1260f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i);
12617702a855SRichard Henderson uint64_t di;
12627702a855SRichard Henderson if (uadd64_overflow(ai, bi, &di)) {
1263f49b12c6SRichard Henderson di = UINT64_MAX;
1264f49b12c6SRichard Henderson }
1265f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di;
1266f49b12c6SRichard Henderson }
1267f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1268f49b12c6SRichard Henderson }
1269f49b12c6SRichard Henderson
HELPER(gvec_ussub8)1270f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1271f49b12c6SRichard Henderson {
1272f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1273f49b12c6SRichard Henderson intptr_t i;
1274f49b12c6SRichard Henderson
1275f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1276f49b12c6SRichard Henderson int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1277f49b12c6SRichard Henderson if (r < 0) {
1278f49b12c6SRichard Henderson r = 0;
1279f49b12c6SRichard Henderson }
1280f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r;
1281f49b12c6SRichard Henderson }
1282f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1283f49b12c6SRichard Henderson }
1284f49b12c6SRichard Henderson
HELPER(gvec_ussub16)1285f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1286f49b12c6SRichard Henderson {
1287f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1288f49b12c6SRichard Henderson intptr_t i;
1289f49b12c6SRichard Henderson
1290f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1291f49b12c6SRichard Henderson int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1292f49b12c6SRichard Henderson if (r < 0) {
1293f49b12c6SRichard Henderson r = 0;
1294f49b12c6SRichard Henderson }
1295f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r;
1296f49b12c6SRichard Henderson }
1297f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1298f49b12c6SRichard Henderson }
1299f49b12c6SRichard Henderson
HELPER(gvec_ussub32)1300f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1301f49b12c6SRichard Henderson {
1302f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1303f49b12c6SRichard Henderson intptr_t i;
1304f49b12c6SRichard Henderson
1305f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1306f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i);
1307f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i);
13087702a855SRichard Henderson uint32_t di;
13097702a855SRichard Henderson if (usub32_overflow(ai, bi, &di)) {
1310f49b12c6SRichard Henderson di = 0;
1311f49b12c6SRichard Henderson }
1312f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di;
1313f49b12c6SRichard Henderson }
1314f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1315f49b12c6SRichard Henderson }
1316f49b12c6SRichard Henderson
HELPER(gvec_ussub64)1317f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1318f49b12c6SRichard Henderson {
1319f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1320f49b12c6SRichard Henderson intptr_t i;
1321f49b12c6SRichard Henderson
1322f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1323f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i);
1324f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i);
13257702a855SRichard Henderson uint64_t di;
13267702a855SRichard Henderson if (usub64_overflow(ai, bi, &di)) {
1327f49b12c6SRichard Henderson di = 0;
1328f49b12c6SRichard Henderson }
1329f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di;
1330f49b12c6SRichard Henderson }
1331f49b12c6SRichard Henderson clear_high(d, oprsz, desc);
1332f49b12c6SRichard Henderson }
1333dd0a0fcdSRichard Henderson
HELPER(gvec_smin8)1334dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1335dd0a0fcdSRichard Henderson {
1336dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1337dd0a0fcdSRichard Henderson intptr_t i;
1338dd0a0fcdSRichard Henderson
1339dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1340dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i);
1341dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i);
1342dd0a0fcdSRichard Henderson int8_t dd = aa < bb ? aa : bb;
1343dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd;
1344dd0a0fcdSRichard Henderson }
1345dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1346dd0a0fcdSRichard Henderson }
1347dd0a0fcdSRichard Henderson
HELPER(gvec_smin16)1348dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1349dd0a0fcdSRichard Henderson {
1350dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1351dd0a0fcdSRichard Henderson intptr_t i;
1352dd0a0fcdSRichard Henderson
1353dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1354dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i);
1355dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i);
1356dd0a0fcdSRichard Henderson int16_t dd = aa < bb ? aa : bb;
1357dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd;
1358dd0a0fcdSRichard Henderson }
1359dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1360dd0a0fcdSRichard Henderson }
1361dd0a0fcdSRichard Henderson
HELPER(gvec_smin32)1362dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1363dd0a0fcdSRichard Henderson {
1364dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1365dd0a0fcdSRichard Henderson intptr_t i;
1366dd0a0fcdSRichard Henderson
1367dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1368dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i);
1369dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i);
1370dd0a0fcdSRichard Henderson int32_t dd = aa < bb ? aa : bb;
1371dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd;
1372dd0a0fcdSRichard Henderson }
1373dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1374dd0a0fcdSRichard Henderson }
1375dd0a0fcdSRichard Henderson
HELPER(gvec_smin64)1376dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1377dd0a0fcdSRichard Henderson {
1378dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1379dd0a0fcdSRichard Henderson intptr_t i;
1380dd0a0fcdSRichard Henderson
1381dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1382dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i);
1383dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i);
1384dd0a0fcdSRichard Henderson int64_t dd = aa < bb ? aa : bb;
1385dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd;
1386dd0a0fcdSRichard Henderson }
1387dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1388dd0a0fcdSRichard Henderson }
1389dd0a0fcdSRichard Henderson
HELPER(gvec_smax8)1390dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1391dd0a0fcdSRichard Henderson {
1392dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1393dd0a0fcdSRichard Henderson intptr_t i;
1394dd0a0fcdSRichard Henderson
1395dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1396dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i);
1397dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i);
1398dd0a0fcdSRichard Henderson int8_t dd = aa > bb ? aa : bb;
1399dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd;
1400dd0a0fcdSRichard Henderson }
1401dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1402dd0a0fcdSRichard Henderson }
1403dd0a0fcdSRichard Henderson
HELPER(gvec_smax16)1404dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1405dd0a0fcdSRichard Henderson {
1406dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1407dd0a0fcdSRichard Henderson intptr_t i;
1408dd0a0fcdSRichard Henderson
1409dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1410dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i);
1411dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i);
1412dd0a0fcdSRichard Henderson int16_t dd = aa > bb ? aa : bb;
1413dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd;
1414dd0a0fcdSRichard Henderson }
1415dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1416dd0a0fcdSRichard Henderson }
1417dd0a0fcdSRichard Henderson
HELPER(gvec_smax32)1418dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1419dd0a0fcdSRichard Henderson {
1420dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1421dd0a0fcdSRichard Henderson intptr_t i;
1422dd0a0fcdSRichard Henderson
1423dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1424dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i);
1425dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i);
1426dd0a0fcdSRichard Henderson int32_t dd = aa > bb ? aa : bb;
1427dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd;
1428dd0a0fcdSRichard Henderson }
1429dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1430dd0a0fcdSRichard Henderson }
1431dd0a0fcdSRichard Henderson
HELPER(gvec_smax64)1432dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1433dd0a0fcdSRichard Henderson {
1434dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1435dd0a0fcdSRichard Henderson intptr_t i;
1436dd0a0fcdSRichard Henderson
1437dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1438dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i);
1439dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i);
1440dd0a0fcdSRichard Henderson int64_t dd = aa > bb ? aa : bb;
1441dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd;
1442dd0a0fcdSRichard Henderson }
1443dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1444dd0a0fcdSRichard Henderson }
1445dd0a0fcdSRichard Henderson
HELPER(gvec_umin8)1446dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1447dd0a0fcdSRichard Henderson {
1448dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1449dd0a0fcdSRichard Henderson intptr_t i;
1450dd0a0fcdSRichard Henderson
1451dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1452dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i);
1453dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i);
1454dd0a0fcdSRichard Henderson uint8_t dd = aa < bb ? aa : bb;
1455dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd;
1456dd0a0fcdSRichard Henderson }
1457dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1458dd0a0fcdSRichard Henderson }
1459dd0a0fcdSRichard Henderson
HELPER(gvec_umin16)1460dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1461dd0a0fcdSRichard Henderson {
1462dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1463dd0a0fcdSRichard Henderson intptr_t i;
1464dd0a0fcdSRichard Henderson
1465dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1466dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i);
1467dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i);
1468dd0a0fcdSRichard Henderson uint16_t dd = aa < bb ? aa : bb;
1469dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd;
1470dd0a0fcdSRichard Henderson }
1471dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1472dd0a0fcdSRichard Henderson }
1473dd0a0fcdSRichard Henderson
HELPER(gvec_umin32)1474dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1475dd0a0fcdSRichard Henderson {
1476dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1477dd0a0fcdSRichard Henderson intptr_t i;
1478dd0a0fcdSRichard Henderson
1479dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1480dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i);
1481dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i);
1482dd0a0fcdSRichard Henderson uint32_t dd = aa < bb ? aa : bb;
1483dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd;
1484dd0a0fcdSRichard Henderson }
1485dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1486dd0a0fcdSRichard Henderson }
1487dd0a0fcdSRichard Henderson
HELPER(gvec_umin64)1488dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1489dd0a0fcdSRichard Henderson {
1490dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1491dd0a0fcdSRichard Henderson intptr_t i;
1492dd0a0fcdSRichard Henderson
1493dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1494dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i);
1495dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i);
1496dd0a0fcdSRichard Henderson uint64_t dd = aa < bb ? aa : bb;
1497dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd;
1498dd0a0fcdSRichard Henderson }
1499dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1500dd0a0fcdSRichard Henderson }
1501dd0a0fcdSRichard Henderson
HELPER(gvec_umax8)1502dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1503dd0a0fcdSRichard Henderson {
1504dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1505dd0a0fcdSRichard Henderson intptr_t i;
1506dd0a0fcdSRichard Henderson
1507dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1508dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i);
1509dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i);
1510dd0a0fcdSRichard Henderson uint8_t dd = aa > bb ? aa : bb;
1511dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd;
1512dd0a0fcdSRichard Henderson }
1513dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1514dd0a0fcdSRichard Henderson }
1515dd0a0fcdSRichard Henderson
HELPER(gvec_umax16)1516dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1517dd0a0fcdSRichard Henderson {
1518dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1519dd0a0fcdSRichard Henderson intptr_t i;
1520dd0a0fcdSRichard Henderson
1521dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1522dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i);
1523dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i);
1524dd0a0fcdSRichard Henderson uint16_t dd = aa > bb ? aa : bb;
1525dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd;
1526dd0a0fcdSRichard Henderson }
1527dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1528dd0a0fcdSRichard Henderson }
1529dd0a0fcdSRichard Henderson
HELPER(gvec_umax32)1530dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1531dd0a0fcdSRichard Henderson {
1532dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1533dd0a0fcdSRichard Henderson intptr_t i;
1534dd0a0fcdSRichard Henderson
1535dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1536dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i);
1537dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i);
1538dd0a0fcdSRichard Henderson uint32_t dd = aa > bb ? aa : bb;
1539dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd;
1540dd0a0fcdSRichard Henderson }
1541dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1542dd0a0fcdSRichard Henderson }
1543dd0a0fcdSRichard Henderson
HELPER(gvec_umax64)1544dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1545dd0a0fcdSRichard Henderson {
1546dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc);
1547dd0a0fcdSRichard Henderson intptr_t i;
1548dd0a0fcdSRichard Henderson
1549dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1550dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i);
1551dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i);
1552dd0a0fcdSRichard Henderson uint64_t dd = aa > bb ? aa : bb;
1553dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd;
1554dd0a0fcdSRichard Henderson }
1555dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc);
1556dd0a0fcdSRichard Henderson }
155738dc1294SRichard Henderson
HELPER(gvec_bitsel)155838dc1294SRichard Henderson void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
155938dc1294SRichard Henderson {
156038dc1294SRichard Henderson intptr_t oprsz = simd_oprsz(desc);
156138dc1294SRichard Henderson intptr_t i;
156238dc1294SRichard Henderson
15636c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
15646c7ab301SRichard Henderson uint64_t aa = *(uint64_t *)(a + i);
15656c7ab301SRichard Henderson uint64_t bb = *(uint64_t *)(b + i);
15666c7ab301SRichard Henderson uint64_t cc = *(uint64_t *)(c + i);
15676c7ab301SRichard Henderson *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
156838dc1294SRichard Henderson }
156938dc1294SRichard Henderson clear_high(d, oprsz, desc);
157038dc1294SRichard Henderson }
1571