xref: /qemu/accel/tcg/tcg-runtime-gvec.c (revision 6c7ab3015ac498181444deff55dcc8fd43ad468c) !
1db432672SRichard Henderson /*
2db432672SRichard Henderson  * Generic vectorized operation runtime
3db432672SRichard Henderson  *
4db432672SRichard Henderson  * Copyright (c) 2018 Linaro
5db432672SRichard Henderson  *
6db432672SRichard Henderson  * This library is free software; you can redistribute it and/or
7db432672SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
8db432672SRichard Henderson  * License as published by the Free Software Foundation; either
9fb0343d5SThomas Huth  * version 2.1 of the License, or (at your option) any later version.
10db432672SRichard Henderson  *
11db432672SRichard Henderson  * This library is distributed in the hope that it will be useful,
12db432672SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13db432672SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14db432672SRichard Henderson  * Lesser General Public License for more details.
15db432672SRichard Henderson  *
16db432672SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
17db432672SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18db432672SRichard Henderson  */
19db432672SRichard Henderson 
20db432672SRichard Henderson #include "qemu/osdep.h"
21db432672SRichard Henderson #include "qemu/host-utils.h"
22db432672SRichard Henderson #include "cpu.h"
23db432672SRichard Henderson #include "exec/helper-proto.h"
24dcb32f1dSPhilippe Mathieu-Daudé #include "tcg/tcg-gvec-desc.h"
25db432672SRichard Henderson 
26db432672SRichard Henderson 
27db432672SRichard Henderson #define DUP16(X)  X
28db432672SRichard Henderson #define DUP8(X)   X
29db432672SRichard Henderson #define DUP4(X)   X
30db432672SRichard Henderson #define DUP2(X)   X
31db432672SRichard Henderson 
32db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
33db432672SRichard Henderson {
34db432672SRichard Henderson     intptr_t maxsz = simd_maxsz(desc);
35db432672SRichard Henderson     intptr_t i;
36db432672SRichard Henderson 
37db432672SRichard Henderson     if (unlikely(maxsz > oprsz)) {
38db432672SRichard Henderson         for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
39db432672SRichard Henderson             *(uint64_t *)(d + i) = 0;
40db432672SRichard Henderson         }
41db432672SRichard Henderson     }
42db432672SRichard Henderson }
43db432672SRichard Henderson 
44db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
45db432672SRichard Henderson {
46db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
47db432672SRichard Henderson     intptr_t i;
48db432672SRichard Henderson 
49*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
50*6c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
51db432672SRichard Henderson     }
52db432672SRichard Henderson     clear_high(d, oprsz, desc);
53db432672SRichard Henderson }
54db432672SRichard Henderson 
55db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
56db432672SRichard Henderson {
57db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
58db432672SRichard Henderson     intptr_t i;
59db432672SRichard Henderson 
60*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
61*6c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
62db432672SRichard Henderson     }
63db432672SRichard Henderson     clear_high(d, oprsz, desc);
64db432672SRichard Henderson }
65db432672SRichard Henderson 
66db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
67db432672SRichard Henderson {
68db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
69db432672SRichard Henderson     intptr_t i;
70db432672SRichard Henderson 
71*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
72*6c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
73db432672SRichard Henderson     }
74db432672SRichard Henderson     clear_high(d, oprsz, desc);
75db432672SRichard Henderson }
76db432672SRichard Henderson 
77db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
78db432672SRichard Henderson {
79db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
80db432672SRichard Henderson     intptr_t i;
81db432672SRichard Henderson 
82*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
83*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
84db432672SRichard Henderson     }
85db432672SRichard Henderson     clear_high(d, oprsz, desc);
86db432672SRichard Henderson }
87db432672SRichard Henderson 
8822fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
8922fc3527SRichard Henderson {
9022fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
91*6c7ab301SRichard Henderson     uint8_t vecb = (uint8_t)DUP16(b);
9222fc3527SRichard Henderson     intptr_t i;
9322fc3527SRichard Henderson 
94*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
95*6c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb;
9622fc3527SRichard Henderson     }
9722fc3527SRichard Henderson     clear_high(d, oprsz, desc);
9822fc3527SRichard Henderson }
9922fc3527SRichard Henderson 
10022fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
10122fc3527SRichard Henderson {
10222fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
103*6c7ab301SRichard Henderson     uint16_t vecb = (uint16_t)DUP8(b);
10422fc3527SRichard Henderson     intptr_t i;
10522fc3527SRichard Henderson 
106*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
107*6c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb;
10822fc3527SRichard Henderson     }
10922fc3527SRichard Henderson     clear_high(d, oprsz, desc);
11022fc3527SRichard Henderson }
11122fc3527SRichard Henderson 
11222fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
11322fc3527SRichard Henderson {
11422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
115*6c7ab301SRichard Henderson     uint32_t vecb = (uint32_t)DUP4(b);
11622fc3527SRichard Henderson     intptr_t i;
11722fc3527SRichard Henderson 
118*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
119*6c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb;
12022fc3527SRichard Henderson     }
12122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
12222fc3527SRichard Henderson }
12322fc3527SRichard Henderson 
12422fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
12522fc3527SRichard Henderson {
12622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
127*6c7ab301SRichard Henderson     uint64_t vecb = (uint64_t)DUP2(b);
12822fc3527SRichard Henderson     intptr_t i;
12922fc3527SRichard Henderson 
130*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
131*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb;
13222fc3527SRichard Henderson     }
13322fc3527SRichard Henderson     clear_high(d, oprsz, desc);
13422fc3527SRichard Henderson }
13522fc3527SRichard Henderson 
136db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
137db432672SRichard Henderson {
138db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
139db432672SRichard Henderson     intptr_t i;
140db432672SRichard Henderson 
141*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
142*6c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
143db432672SRichard Henderson     }
144db432672SRichard Henderson     clear_high(d, oprsz, desc);
145db432672SRichard Henderson }
146db432672SRichard Henderson 
147db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
148db432672SRichard Henderson {
149db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
150db432672SRichard Henderson     intptr_t i;
151db432672SRichard Henderson 
152*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
153*6c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
154db432672SRichard Henderson     }
155db432672SRichard Henderson     clear_high(d, oprsz, desc);
156db432672SRichard Henderson }
157db432672SRichard Henderson 
158db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
159db432672SRichard Henderson {
160db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
161db432672SRichard Henderson     intptr_t i;
162db432672SRichard Henderson 
163*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
164*6c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
165db432672SRichard Henderson     }
166db432672SRichard Henderson     clear_high(d, oprsz, desc);
167db432672SRichard Henderson }
168db432672SRichard Henderson 
169db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
170db432672SRichard Henderson {
171db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
172db432672SRichard Henderson     intptr_t i;
173db432672SRichard Henderson 
174*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
175*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
176db432672SRichard Henderson     }
177db432672SRichard Henderson     clear_high(d, oprsz, desc);
178db432672SRichard Henderson }
179db432672SRichard Henderson 
18022fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
18122fc3527SRichard Henderson {
18222fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
183*6c7ab301SRichard Henderson     uint8_t vecb = (uint8_t)DUP16(b);
18422fc3527SRichard Henderson     intptr_t i;
18522fc3527SRichard Henderson 
186*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
187*6c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb;
18822fc3527SRichard Henderson     }
18922fc3527SRichard Henderson     clear_high(d, oprsz, desc);
19022fc3527SRichard Henderson }
19122fc3527SRichard Henderson 
19222fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
19322fc3527SRichard Henderson {
19422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
195*6c7ab301SRichard Henderson     uint16_t vecb = (uint16_t)DUP8(b);
19622fc3527SRichard Henderson     intptr_t i;
19722fc3527SRichard Henderson 
198*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
199*6c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb;
20022fc3527SRichard Henderson     }
20122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
20222fc3527SRichard Henderson }
20322fc3527SRichard Henderson 
20422fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
20522fc3527SRichard Henderson {
20622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
207*6c7ab301SRichard Henderson     uint32_t vecb = (uint32_t)DUP4(b);
20822fc3527SRichard Henderson     intptr_t i;
20922fc3527SRichard Henderson 
210*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
211*6c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb;
21222fc3527SRichard Henderson     }
21322fc3527SRichard Henderson     clear_high(d, oprsz, desc);
21422fc3527SRichard Henderson }
21522fc3527SRichard Henderson 
21622fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
21722fc3527SRichard Henderson {
21822fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
219*6c7ab301SRichard Henderson     uint64_t vecb = (uint64_t)DUP2(b);
22022fc3527SRichard Henderson     intptr_t i;
22122fc3527SRichard Henderson 
222*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
223*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb;
22422fc3527SRichard Henderson     }
22522fc3527SRichard Henderson     clear_high(d, oprsz, desc);
22622fc3527SRichard Henderson }
22722fc3527SRichard Henderson 
2283774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
2293774030aSRichard Henderson {
2303774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2313774030aSRichard Henderson     intptr_t i;
2323774030aSRichard Henderson 
233*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
234*6c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
2353774030aSRichard Henderson     }
2363774030aSRichard Henderson     clear_high(d, oprsz, desc);
2373774030aSRichard Henderson }
2383774030aSRichard Henderson 
2393774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
2403774030aSRichard Henderson {
2413774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2423774030aSRichard Henderson     intptr_t i;
2433774030aSRichard Henderson 
244*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
245*6c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
2463774030aSRichard Henderson     }
2473774030aSRichard Henderson     clear_high(d, oprsz, desc);
2483774030aSRichard Henderson }
2493774030aSRichard Henderson 
2503774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
2513774030aSRichard Henderson {
2523774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2533774030aSRichard Henderson     intptr_t i;
2543774030aSRichard Henderson 
255*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
256*6c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
2573774030aSRichard Henderson     }
2583774030aSRichard Henderson     clear_high(d, oprsz, desc);
2593774030aSRichard Henderson }
2603774030aSRichard Henderson 
2613774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
2623774030aSRichard Henderson {
2633774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2643774030aSRichard Henderson     intptr_t i;
2653774030aSRichard Henderson 
266*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
267*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
2683774030aSRichard Henderson     }
2693774030aSRichard Henderson     clear_high(d, oprsz, desc);
2703774030aSRichard Henderson }
2713774030aSRichard Henderson 
27222fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
27322fc3527SRichard Henderson {
27422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
275*6c7ab301SRichard Henderson     uint8_t vecb = (uint8_t)DUP16(b);
27622fc3527SRichard Henderson     intptr_t i;
27722fc3527SRichard Henderson 
278*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
279*6c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb;
28022fc3527SRichard Henderson     }
28122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
28222fc3527SRichard Henderson }
28322fc3527SRichard Henderson 
28422fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
28522fc3527SRichard Henderson {
28622fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
287*6c7ab301SRichard Henderson     uint16_t vecb = (uint16_t)DUP8(b);
28822fc3527SRichard Henderson     intptr_t i;
28922fc3527SRichard Henderson 
290*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
291*6c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb;
29222fc3527SRichard Henderson     }
29322fc3527SRichard Henderson     clear_high(d, oprsz, desc);
29422fc3527SRichard Henderson }
29522fc3527SRichard Henderson 
29622fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
29722fc3527SRichard Henderson {
29822fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
299*6c7ab301SRichard Henderson     uint32_t vecb = (uint32_t)DUP4(b);
30022fc3527SRichard Henderson     intptr_t i;
30122fc3527SRichard Henderson 
302*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
303*6c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb;
30422fc3527SRichard Henderson     }
30522fc3527SRichard Henderson     clear_high(d, oprsz, desc);
30622fc3527SRichard Henderson }
30722fc3527SRichard Henderson 
30822fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
30922fc3527SRichard Henderson {
31022fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
311*6c7ab301SRichard Henderson     uint64_t vecb = (uint64_t)DUP2(b);
31222fc3527SRichard Henderson     intptr_t i;
31322fc3527SRichard Henderson 
314*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
315*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb;
31622fc3527SRichard Henderson     }
31722fc3527SRichard Henderson     clear_high(d, oprsz, desc);
31822fc3527SRichard Henderson }
31922fc3527SRichard Henderson 
320db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
321db432672SRichard Henderson {
322db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
323db432672SRichard Henderson     intptr_t i;
324db432672SRichard Henderson 
325*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
326*6c7ab301SRichard Henderson         *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
327db432672SRichard Henderson     }
328db432672SRichard Henderson     clear_high(d, oprsz, desc);
329db432672SRichard Henderson }
330db432672SRichard Henderson 
331db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
332db432672SRichard Henderson {
333db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
334db432672SRichard Henderson     intptr_t i;
335db432672SRichard Henderson 
336*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
337*6c7ab301SRichard Henderson         *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
338db432672SRichard Henderson     }
339db432672SRichard Henderson     clear_high(d, oprsz, desc);
340db432672SRichard Henderson }
341db432672SRichard Henderson 
342db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
343db432672SRichard Henderson {
344db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
345db432672SRichard Henderson     intptr_t i;
346db432672SRichard Henderson 
347*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
348*6c7ab301SRichard Henderson         *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
349db432672SRichard Henderson     }
350db432672SRichard Henderson     clear_high(d, oprsz, desc);
351db432672SRichard Henderson }
352db432672SRichard Henderson 
353db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
354db432672SRichard Henderson {
355db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
356db432672SRichard Henderson     intptr_t i;
357db432672SRichard Henderson 
358*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
359*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
360db432672SRichard Henderson     }
361db432672SRichard Henderson     clear_high(d, oprsz, desc);
362db432672SRichard Henderson }
363db432672SRichard Henderson 
364bcefc902SRichard Henderson void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
365bcefc902SRichard Henderson {
366bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
367bcefc902SRichard Henderson     intptr_t i;
368bcefc902SRichard Henderson 
369bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
370bcefc902SRichard Henderson         int8_t aa = *(int8_t *)(a + i);
371bcefc902SRichard Henderson         *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
372bcefc902SRichard Henderson     }
373bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
374bcefc902SRichard Henderson }
375bcefc902SRichard Henderson 
376bcefc902SRichard Henderson void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
377bcefc902SRichard Henderson {
378bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
379bcefc902SRichard Henderson     intptr_t i;
380bcefc902SRichard Henderson 
381bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
382bcefc902SRichard Henderson         int16_t aa = *(int16_t *)(a + i);
383bcefc902SRichard Henderson         *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
384bcefc902SRichard Henderson     }
385bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
386bcefc902SRichard Henderson }
387bcefc902SRichard Henderson 
388bcefc902SRichard Henderson void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
389bcefc902SRichard Henderson {
390bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
391bcefc902SRichard Henderson     intptr_t i;
392bcefc902SRichard Henderson 
393bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
394bcefc902SRichard Henderson         int32_t aa = *(int32_t *)(a + i);
395bcefc902SRichard Henderson         *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
396bcefc902SRichard Henderson     }
397bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
398bcefc902SRichard Henderson }
399bcefc902SRichard Henderson 
400bcefc902SRichard Henderson void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
401bcefc902SRichard Henderson {
402bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
403bcefc902SRichard Henderson     intptr_t i;
404bcefc902SRichard Henderson 
405bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
406bcefc902SRichard Henderson         int64_t aa = *(int64_t *)(a + i);
407bcefc902SRichard Henderson         *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
408bcefc902SRichard Henderson     }
409bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
410bcefc902SRichard Henderson }
411bcefc902SRichard Henderson 
412db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
413db432672SRichard Henderson {
414db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
415db432672SRichard Henderson 
416db432672SRichard Henderson     memcpy(d, a, oprsz);
417db432672SRichard Henderson     clear_high(d, oprsz, desc);
418db432672SRichard Henderson }
419db432672SRichard Henderson 
420db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
421db432672SRichard Henderson {
422db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
423db432672SRichard Henderson     intptr_t i;
424db432672SRichard Henderson 
425db432672SRichard Henderson     if (c == 0) {
426db432672SRichard Henderson         oprsz = 0;
427db432672SRichard Henderson     } else {
428db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
429db432672SRichard Henderson             *(uint64_t *)(d + i) = c;
430db432672SRichard Henderson         }
431db432672SRichard Henderson     }
432db432672SRichard Henderson     clear_high(d, oprsz, desc);
433db432672SRichard Henderson }
434db432672SRichard Henderson 
435db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
436db432672SRichard Henderson {
437db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
438db432672SRichard Henderson     intptr_t i;
439db432672SRichard Henderson 
440db432672SRichard Henderson     if (c == 0) {
441db432672SRichard Henderson         oprsz = 0;
442db432672SRichard Henderson     } else {
443db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
444db432672SRichard Henderson             *(uint32_t *)(d + i) = c;
445db432672SRichard Henderson         }
446db432672SRichard Henderson     }
447db432672SRichard Henderson     clear_high(d, oprsz, desc);
448db432672SRichard Henderson }
449db432672SRichard Henderson 
450db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
451db432672SRichard Henderson {
452db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
453db432672SRichard Henderson }
454db432672SRichard Henderson 
455db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
456db432672SRichard Henderson {
457db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
458db432672SRichard Henderson }
459db432672SRichard Henderson 
460db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
461db432672SRichard Henderson {
462db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
463db432672SRichard Henderson     intptr_t i;
464db432672SRichard Henderson 
465*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
466*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
467db432672SRichard Henderson     }
468db432672SRichard Henderson     clear_high(d, oprsz, desc);
469db432672SRichard Henderson }
470db432672SRichard Henderson 
471db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
472db432672SRichard Henderson {
473db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
474db432672SRichard Henderson     intptr_t i;
475db432672SRichard Henderson 
476*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
477*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
478db432672SRichard Henderson     }
479db432672SRichard Henderson     clear_high(d, oprsz, desc);
480db432672SRichard Henderson }
481db432672SRichard Henderson 
482db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
483db432672SRichard Henderson {
484db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
485db432672SRichard Henderson     intptr_t i;
486db432672SRichard Henderson 
487*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
488*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
489db432672SRichard Henderson     }
490db432672SRichard Henderson     clear_high(d, oprsz, desc);
491db432672SRichard Henderson }
492db432672SRichard Henderson 
493db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
494db432672SRichard Henderson {
495db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
496db432672SRichard Henderson     intptr_t i;
497db432672SRichard Henderson 
498*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
499*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
500db432672SRichard Henderson     }
501db432672SRichard Henderson     clear_high(d, oprsz, desc);
502db432672SRichard Henderson }
503db432672SRichard Henderson 
504db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
505db432672SRichard Henderson {
506db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
507db432672SRichard Henderson     intptr_t i;
508db432672SRichard Henderson 
509*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
510*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
511db432672SRichard Henderson     }
512db432672SRichard Henderson     clear_high(d, oprsz, desc);
513db432672SRichard Henderson }
514db432672SRichard Henderson 
515db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
516db432672SRichard Henderson {
517db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
518db432672SRichard Henderson     intptr_t i;
519db432672SRichard Henderson 
520*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
521*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
522db432672SRichard Henderson     }
523db432672SRichard Henderson     clear_high(d, oprsz, desc);
524db432672SRichard Henderson }
525d0ec9796SRichard Henderson 
526f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
527f550805dSRichard Henderson {
528f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
529f550805dSRichard Henderson     intptr_t i;
530f550805dSRichard Henderson 
531*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
532*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
533f550805dSRichard Henderson     }
534f550805dSRichard Henderson     clear_high(d, oprsz, desc);
535f550805dSRichard Henderson }
536f550805dSRichard Henderson 
537f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
538f550805dSRichard Henderson {
539f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
540f550805dSRichard Henderson     intptr_t i;
541f550805dSRichard Henderson 
542*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
543*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
544f550805dSRichard Henderson     }
545f550805dSRichard Henderson     clear_high(d, oprsz, desc);
546f550805dSRichard Henderson }
547f550805dSRichard Henderson 
548f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
549f550805dSRichard Henderson {
550f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
551f550805dSRichard Henderson     intptr_t i;
552f550805dSRichard Henderson 
553*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
554*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
555f550805dSRichard Henderson     }
556f550805dSRichard Henderson     clear_high(d, oprsz, desc);
557f550805dSRichard Henderson }
558f550805dSRichard Henderson 
55922fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
56022fc3527SRichard Henderson {
56122fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
562*6c7ab301SRichard Henderson     uint64_t vecb = (uint64_t)DUP2(b);
56322fc3527SRichard Henderson     intptr_t i;
56422fc3527SRichard Henderson 
565*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
566*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb;
56722fc3527SRichard Henderson     }
56822fc3527SRichard Henderson     clear_high(d, oprsz, desc);
56922fc3527SRichard Henderson }
57022fc3527SRichard Henderson 
57122fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
57222fc3527SRichard Henderson {
57322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
574*6c7ab301SRichard Henderson     uint64_t vecb = (uint64_t)DUP2(b);
57522fc3527SRichard Henderson     intptr_t i;
57622fc3527SRichard Henderson 
577*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
578*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb;
57922fc3527SRichard Henderson     }
58022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
58122fc3527SRichard Henderson }
58222fc3527SRichard Henderson 
58322fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
58422fc3527SRichard Henderson {
58522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
586*6c7ab301SRichard Henderson     uint64_t vecb = (uint64_t)DUP2(b);
58722fc3527SRichard Henderson     intptr_t i;
58822fc3527SRichard Henderson 
589*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
590*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb;
59122fc3527SRichard Henderson     }
59222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
59322fc3527SRichard Henderson }
59422fc3527SRichard Henderson 
595d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
596d0ec9796SRichard Henderson {
597d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
598d0ec9796SRichard Henderson     int shift = simd_data(desc);
599d0ec9796SRichard Henderson     intptr_t i;
600d0ec9796SRichard Henderson 
601*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
602*6c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
603d0ec9796SRichard Henderson     }
604d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
605d0ec9796SRichard Henderson }
606d0ec9796SRichard Henderson 
607d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
608d0ec9796SRichard Henderson {
609d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
610d0ec9796SRichard Henderson     int shift = simd_data(desc);
611d0ec9796SRichard Henderson     intptr_t i;
612d0ec9796SRichard Henderson 
613*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
614*6c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
615d0ec9796SRichard Henderson     }
616d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
617d0ec9796SRichard Henderson }
618d0ec9796SRichard Henderson 
619d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
620d0ec9796SRichard Henderson {
621d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
622d0ec9796SRichard Henderson     int shift = simd_data(desc);
623d0ec9796SRichard Henderson     intptr_t i;
624d0ec9796SRichard Henderson 
625*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
626*6c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
627d0ec9796SRichard Henderson     }
628d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
629d0ec9796SRichard Henderson }
630d0ec9796SRichard Henderson 
631d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
632d0ec9796SRichard Henderson {
633d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
634d0ec9796SRichard Henderson     int shift = simd_data(desc);
635d0ec9796SRichard Henderson     intptr_t i;
636d0ec9796SRichard Henderson 
637*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
638*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
639d0ec9796SRichard Henderson     }
640d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
641d0ec9796SRichard Henderson }
642d0ec9796SRichard Henderson 
643d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
644d0ec9796SRichard Henderson {
645d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
646d0ec9796SRichard Henderson     int shift = simd_data(desc);
647d0ec9796SRichard Henderson     intptr_t i;
648d0ec9796SRichard Henderson 
649*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
650*6c7ab301SRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
651d0ec9796SRichard Henderson     }
652d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
653d0ec9796SRichard Henderson }
654d0ec9796SRichard Henderson 
655d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
656d0ec9796SRichard Henderson {
657d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
658d0ec9796SRichard Henderson     int shift = simd_data(desc);
659d0ec9796SRichard Henderson     intptr_t i;
660d0ec9796SRichard Henderson 
661*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
662*6c7ab301SRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
663d0ec9796SRichard Henderson     }
664d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
665d0ec9796SRichard Henderson }
666d0ec9796SRichard Henderson 
667d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
668d0ec9796SRichard Henderson {
669d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
670d0ec9796SRichard Henderson     int shift = simd_data(desc);
671d0ec9796SRichard Henderson     intptr_t i;
672d0ec9796SRichard Henderson 
673*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
674*6c7ab301SRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
675d0ec9796SRichard Henderson     }
676d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
677d0ec9796SRichard Henderson }
678d0ec9796SRichard Henderson 
679d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
680d0ec9796SRichard Henderson {
681d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
682d0ec9796SRichard Henderson     int shift = simd_data(desc);
683d0ec9796SRichard Henderson     intptr_t i;
684d0ec9796SRichard Henderson 
685*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
686*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
687d0ec9796SRichard Henderson     }
688d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
689d0ec9796SRichard Henderson }
690d0ec9796SRichard Henderson 
691d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
692d0ec9796SRichard Henderson {
693d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
694d0ec9796SRichard Henderson     int shift = simd_data(desc);
695d0ec9796SRichard Henderson     intptr_t i;
696d0ec9796SRichard Henderson 
697*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
698*6c7ab301SRichard Henderson         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
699d0ec9796SRichard Henderson     }
700d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
701d0ec9796SRichard Henderson }
702d0ec9796SRichard Henderson 
703d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
704d0ec9796SRichard Henderson {
705d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
706d0ec9796SRichard Henderson     int shift = simd_data(desc);
707d0ec9796SRichard Henderson     intptr_t i;
708d0ec9796SRichard Henderson 
709*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
710*6c7ab301SRichard Henderson         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
711d0ec9796SRichard Henderson     }
712d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
713d0ec9796SRichard Henderson }
714d0ec9796SRichard Henderson 
715d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
716d0ec9796SRichard Henderson {
717d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
718d0ec9796SRichard Henderson     int shift = simd_data(desc);
719d0ec9796SRichard Henderson     intptr_t i;
720d0ec9796SRichard Henderson 
721*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
722*6c7ab301SRichard Henderson         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
723d0ec9796SRichard Henderson     }
724d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
725d0ec9796SRichard Henderson }
726d0ec9796SRichard Henderson 
727d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
728d0ec9796SRichard Henderson {
729d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
730d0ec9796SRichard Henderson     int shift = simd_data(desc);
731d0ec9796SRichard Henderson     intptr_t i;
732d0ec9796SRichard Henderson 
733*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
734*6c7ab301SRichard Henderson         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
735d0ec9796SRichard Henderson     }
736d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
737d0ec9796SRichard Henderson }
738212be173SRichard Henderson 
7395ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
7405ee5c14cSRichard Henderson {
7415ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7425ee5c14cSRichard Henderson     intptr_t i;
7435ee5c14cSRichard Henderson 
7445ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
7455ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
7465ee5c14cSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
7475ee5c14cSRichard Henderson     }
7485ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7495ee5c14cSRichard Henderson }
7505ee5c14cSRichard Henderson 
7515ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
7525ee5c14cSRichard Henderson {
7535ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7545ee5c14cSRichard Henderson     intptr_t i;
7555ee5c14cSRichard Henderson 
7565ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
7575ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
7585ee5c14cSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
7595ee5c14cSRichard Henderson     }
7605ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7615ee5c14cSRichard Henderson }
7625ee5c14cSRichard Henderson 
7635ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
7645ee5c14cSRichard Henderson {
7655ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7665ee5c14cSRichard Henderson     intptr_t i;
7675ee5c14cSRichard Henderson 
7685ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
7695ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
7705ee5c14cSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
7715ee5c14cSRichard Henderson     }
7725ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7735ee5c14cSRichard Henderson }
7745ee5c14cSRichard Henderson 
7755ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
7765ee5c14cSRichard Henderson {
7775ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7785ee5c14cSRichard Henderson     intptr_t i;
7795ee5c14cSRichard Henderson 
7805ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
7815ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
7825ee5c14cSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
7835ee5c14cSRichard Henderson     }
7845ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7855ee5c14cSRichard Henderson }
7865ee5c14cSRichard Henderson 
7875ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
7885ee5c14cSRichard Henderson {
7895ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7905ee5c14cSRichard Henderson     intptr_t i;
7915ee5c14cSRichard Henderson 
7925ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
7935ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
7945ee5c14cSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
7955ee5c14cSRichard Henderson     }
7965ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7975ee5c14cSRichard Henderson }
7985ee5c14cSRichard Henderson 
7995ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
8005ee5c14cSRichard Henderson {
8015ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8025ee5c14cSRichard Henderson     intptr_t i;
8035ee5c14cSRichard Henderson 
8045ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
8055ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
8065ee5c14cSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
8075ee5c14cSRichard Henderson     }
8085ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8095ee5c14cSRichard Henderson }
8105ee5c14cSRichard Henderson 
8115ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
8125ee5c14cSRichard Henderson {
8135ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8145ee5c14cSRichard Henderson     intptr_t i;
8155ee5c14cSRichard Henderson 
8165ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
8175ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
8185ee5c14cSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
8195ee5c14cSRichard Henderson     }
8205ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8215ee5c14cSRichard Henderson }
8225ee5c14cSRichard Henderson 
8235ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
8245ee5c14cSRichard Henderson {
8255ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8265ee5c14cSRichard Henderson     intptr_t i;
8275ee5c14cSRichard Henderson 
8285ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8295ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
8305ee5c14cSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
8315ee5c14cSRichard Henderson     }
8325ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8335ee5c14cSRichard Henderson }
8345ee5c14cSRichard Henderson 
8355ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
8365ee5c14cSRichard Henderson {
8375ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8385ee5c14cSRichard Henderson     intptr_t i;
8395ee5c14cSRichard Henderson 
840899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
8415ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
8425ee5c14cSRichard Henderson         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
8435ee5c14cSRichard Henderson     }
8445ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8455ee5c14cSRichard Henderson }
8465ee5c14cSRichard Henderson 
8475ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
8485ee5c14cSRichard Henderson {
8495ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8505ee5c14cSRichard Henderson     intptr_t i;
8515ee5c14cSRichard Henderson 
8525ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
8535ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
8545ee5c14cSRichard Henderson         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
8555ee5c14cSRichard Henderson     }
8565ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8575ee5c14cSRichard Henderson }
8585ee5c14cSRichard Henderson 
8595ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
8605ee5c14cSRichard Henderson {
8615ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8625ee5c14cSRichard Henderson     intptr_t i;
8635ee5c14cSRichard Henderson 
864899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
8655ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
8665ee5c14cSRichard Henderson         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
8675ee5c14cSRichard Henderson     }
8685ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8695ee5c14cSRichard Henderson }
8705ee5c14cSRichard Henderson 
8715ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
8725ee5c14cSRichard Henderson {
8735ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8745ee5c14cSRichard Henderson     intptr_t i;
8755ee5c14cSRichard Henderson 
876899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
8775ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
8785ee5c14cSRichard Henderson         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
8795ee5c14cSRichard Henderson     }
8805ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8815ee5c14cSRichard Henderson }
8825ee5c14cSRichard Henderson 
883212be173SRichard Henderson #define DO_CMP0(X)  -(X)
884212be173SRichard Henderson 
885212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP)                                            \
886212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
887212be173SRichard Henderson {                                                                          \
888212be173SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);                                     \
889212be173SRichard Henderson     intptr_t i;                                                            \
8906cb1d3b8SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
891212be173SRichard Henderson         *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i));  \
892212be173SRichard Henderson     }                                                                      \
893212be173SRichard Henderson     clear_high(d, oprsz, desc);                                            \
894212be173SRichard Henderson }
895212be173SRichard Henderson 
896212be173SRichard Henderson #define DO_CMP2(SZ) \
897*6c7ab301SRichard Henderson     DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==)    \
898*6c7ab301SRichard Henderson     DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=)    \
899*6c7ab301SRichard Henderson     DO_CMP1(gvec_lt##SZ, int##SZ##_t, <)      \
900*6c7ab301SRichard Henderson     DO_CMP1(gvec_le##SZ, int##SZ##_t, <=)     \
901*6c7ab301SRichard Henderson     DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <)    \
902*6c7ab301SRichard Henderson     DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
903212be173SRichard Henderson 
904212be173SRichard Henderson DO_CMP2(8)
905212be173SRichard Henderson DO_CMP2(16)
906212be173SRichard Henderson DO_CMP2(32)
907212be173SRichard Henderson DO_CMP2(64)
908212be173SRichard Henderson 
909212be173SRichard Henderson #undef DO_CMP0
910212be173SRichard Henderson #undef DO_CMP1
911212be173SRichard Henderson #undef DO_CMP2
912f49b12c6SRichard Henderson 
913f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
914f49b12c6SRichard Henderson {
915f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
916f49b12c6SRichard Henderson     intptr_t i;
917f49b12c6SRichard Henderson 
918f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
919f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
920f49b12c6SRichard Henderson         if (r > INT8_MAX) {
921f49b12c6SRichard Henderson             r = INT8_MAX;
922f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
923f49b12c6SRichard Henderson             r = INT8_MIN;
924f49b12c6SRichard Henderson         }
925f49b12c6SRichard Henderson         *(int8_t *)(d + i) = r;
926f49b12c6SRichard Henderson     }
927f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
928f49b12c6SRichard Henderson }
929f49b12c6SRichard Henderson 
930f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
931f49b12c6SRichard Henderson {
932f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
933f49b12c6SRichard Henderson     intptr_t i;
934f49b12c6SRichard Henderson 
935f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
936f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
937f49b12c6SRichard Henderson         if (r > INT16_MAX) {
938f49b12c6SRichard Henderson             r = INT16_MAX;
939f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
940f49b12c6SRichard Henderson             r = INT16_MIN;
941f49b12c6SRichard Henderson         }
942f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
943f49b12c6SRichard Henderson     }
944f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
945f49b12c6SRichard Henderson }
946f49b12c6SRichard Henderson 
947f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
948f49b12c6SRichard Henderson {
949f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
950f49b12c6SRichard Henderson     intptr_t i;
951f49b12c6SRichard Henderson 
952f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
953f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
954f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
955f49b12c6SRichard Henderson         int32_t di = ai + bi;
956f49b12c6SRichard Henderson         if (((di ^ ai) &~ (ai ^ bi)) < 0) {
957f49b12c6SRichard Henderson             /* Signed overflow.  */
958f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
959f49b12c6SRichard Henderson         }
960f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
961f49b12c6SRichard Henderson     }
962f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
963f49b12c6SRichard Henderson }
964f49b12c6SRichard Henderson 
965f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
966f49b12c6SRichard Henderson {
967f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
968f49b12c6SRichard Henderson     intptr_t i;
969f49b12c6SRichard Henderson 
970f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
971f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
972f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
973f49b12c6SRichard Henderson         int64_t di = ai + bi;
974f49b12c6SRichard Henderson         if (((di ^ ai) &~ (ai ^ bi)) < 0) {
975f49b12c6SRichard Henderson             /* Signed overflow.  */
976f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
977f49b12c6SRichard Henderson         }
978f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
979f49b12c6SRichard Henderson     }
980f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
981f49b12c6SRichard Henderson }
982f49b12c6SRichard Henderson 
983f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
984f49b12c6SRichard Henderson {
985f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
986f49b12c6SRichard Henderson     intptr_t i;
987f49b12c6SRichard Henderson 
988f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
989f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
990f49b12c6SRichard Henderson         if (r > INT8_MAX) {
991f49b12c6SRichard Henderson             r = INT8_MAX;
992f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
993f49b12c6SRichard Henderson             r = INT8_MIN;
994f49b12c6SRichard Henderson         }
995f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
996f49b12c6SRichard Henderson     }
997f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
998f49b12c6SRichard Henderson }
999f49b12c6SRichard Henderson 
1000f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1001f49b12c6SRichard Henderson {
1002f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1003f49b12c6SRichard Henderson     intptr_t i;
1004f49b12c6SRichard Henderson 
1005f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1006f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1007f49b12c6SRichard Henderson         if (r > INT16_MAX) {
1008f49b12c6SRichard Henderson             r = INT16_MAX;
1009f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
1010f49b12c6SRichard Henderson             r = INT16_MIN;
1011f49b12c6SRichard Henderson         }
1012f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
1013f49b12c6SRichard Henderson     }
1014f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1015f49b12c6SRichard Henderson }
1016f49b12c6SRichard Henderson 
1017f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1018f49b12c6SRichard Henderson {
1019f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1020f49b12c6SRichard Henderson     intptr_t i;
1021f49b12c6SRichard Henderson 
1022f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1023f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
1024f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
1025f49b12c6SRichard Henderson         int32_t di = ai - bi;
1026f49b12c6SRichard Henderson         if (((di ^ ai) & (ai ^ bi)) < 0) {
1027f49b12c6SRichard Henderson             /* Signed overflow.  */
1028f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
1029f49b12c6SRichard Henderson         }
1030f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
1031f49b12c6SRichard Henderson     }
1032f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1033f49b12c6SRichard Henderson }
1034f49b12c6SRichard Henderson 
1035f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1036f49b12c6SRichard Henderson {
1037f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1038f49b12c6SRichard Henderson     intptr_t i;
1039f49b12c6SRichard Henderson 
1040f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1041f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
1042f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
1043f49b12c6SRichard Henderson         int64_t di = ai - bi;
1044f49b12c6SRichard Henderson         if (((di ^ ai) & (ai ^ bi)) < 0) {
1045f49b12c6SRichard Henderson             /* Signed overflow.  */
1046f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
1047f49b12c6SRichard Henderson         }
1048f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
1049f49b12c6SRichard Henderson     }
1050f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1051f49b12c6SRichard Henderson }
1052f49b12c6SRichard Henderson 
1053f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1054f49b12c6SRichard Henderson {
1055f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1056f49b12c6SRichard Henderson     intptr_t i;
1057f49b12c6SRichard Henderson 
1058f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1059f49b12c6SRichard Henderson         unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1060f49b12c6SRichard Henderson         if (r > UINT8_MAX) {
1061f49b12c6SRichard Henderson             r = UINT8_MAX;
1062f49b12c6SRichard Henderson         }
1063f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1064f49b12c6SRichard Henderson     }
1065f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1066f49b12c6SRichard Henderson }
1067f49b12c6SRichard Henderson 
1068f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1069f49b12c6SRichard Henderson {
1070f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1071f49b12c6SRichard Henderson     intptr_t i;
1072f49b12c6SRichard Henderson 
1073f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1074f49b12c6SRichard Henderson         unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1075f49b12c6SRichard Henderson         if (r > UINT16_MAX) {
1076f49b12c6SRichard Henderson             r = UINT16_MAX;
1077f49b12c6SRichard Henderson         }
1078f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
1079f49b12c6SRichard Henderson     }
1080f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1081f49b12c6SRichard Henderson }
1082f49b12c6SRichard Henderson 
1083f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1084f49b12c6SRichard Henderson {
1085f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1086f49b12c6SRichard Henderson     intptr_t i;
1087f49b12c6SRichard Henderson 
1088f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1089f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1090f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
1091f49b12c6SRichard Henderson         uint32_t di = ai + bi;
1092f49b12c6SRichard Henderson         if (di < ai) {
1093f49b12c6SRichard Henderson             di = UINT32_MAX;
1094f49b12c6SRichard Henderson         }
1095f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1096f49b12c6SRichard Henderson     }
1097f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1098f49b12c6SRichard Henderson }
1099f49b12c6SRichard Henderson 
1100f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1101f49b12c6SRichard Henderson {
1102f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1103f49b12c6SRichard Henderson     intptr_t i;
1104f49b12c6SRichard Henderson 
1105f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1106f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1107f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
1108f49b12c6SRichard Henderson         uint64_t di = ai + bi;
1109f49b12c6SRichard Henderson         if (di < ai) {
1110f49b12c6SRichard Henderson             di = UINT64_MAX;
1111f49b12c6SRichard Henderson         }
1112f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1113f49b12c6SRichard Henderson     }
1114f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1115f49b12c6SRichard Henderson }
1116f49b12c6SRichard Henderson 
1117f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1118f49b12c6SRichard Henderson {
1119f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1120f49b12c6SRichard Henderson     intptr_t i;
1121f49b12c6SRichard Henderson 
1122f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1123f49b12c6SRichard Henderson         int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1124f49b12c6SRichard Henderson         if (r < 0) {
1125f49b12c6SRichard Henderson             r = 0;
1126f49b12c6SRichard Henderson         }
1127f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1128f49b12c6SRichard Henderson     }
1129f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1130f49b12c6SRichard Henderson }
1131f49b12c6SRichard Henderson 
1132f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1133f49b12c6SRichard Henderson {
1134f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1135f49b12c6SRichard Henderson     intptr_t i;
1136f49b12c6SRichard Henderson 
1137f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1138f49b12c6SRichard Henderson         int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1139f49b12c6SRichard Henderson         if (r < 0) {
1140f49b12c6SRichard Henderson             r = 0;
1141f49b12c6SRichard Henderson         }
1142f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
1143f49b12c6SRichard Henderson     }
1144f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1145f49b12c6SRichard Henderson }
1146f49b12c6SRichard Henderson 
1147f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1148f49b12c6SRichard Henderson {
1149f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1150f49b12c6SRichard Henderson     intptr_t i;
1151f49b12c6SRichard Henderson 
1152f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1153f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1154f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
1155f49b12c6SRichard Henderson         uint32_t di = ai - bi;
1156f49b12c6SRichard Henderson         if (ai < bi) {
1157f49b12c6SRichard Henderson             di = 0;
1158f49b12c6SRichard Henderson         }
1159f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1160f49b12c6SRichard Henderson     }
1161f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1162f49b12c6SRichard Henderson }
1163f49b12c6SRichard Henderson 
1164f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1165f49b12c6SRichard Henderson {
1166f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1167f49b12c6SRichard Henderson     intptr_t i;
1168f49b12c6SRichard Henderson 
1169f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1170f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1171f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
1172f49b12c6SRichard Henderson         uint64_t di = ai - bi;
1173f49b12c6SRichard Henderson         if (ai < bi) {
1174f49b12c6SRichard Henderson             di = 0;
1175f49b12c6SRichard Henderson         }
1176f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1177f49b12c6SRichard Henderson     }
1178f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1179f49b12c6SRichard Henderson }
1180dd0a0fcdSRichard Henderson 
1181dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1182dd0a0fcdSRichard Henderson {
1183dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1184dd0a0fcdSRichard Henderson     intptr_t i;
1185dd0a0fcdSRichard Henderson 
1186dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1187dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1188dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1189dd0a0fcdSRichard Henderson         int8_t dd = aa < bb ? aa : bb;
1190dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1191dd0a0fcdSRichard Henderson     }
1192dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1193dd0a0fcdSRichard Henderson }
1194dd0a0fcdSRichard Henderson 
1195dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1196dd0a0fcdSRichard Henderson {
1197dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1198dd0a0fcdSRichard Henderson     intptr_t i;
1199dd0a0fcdSRichard Henderson 
1200dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1201dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1202dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1203dd0a0fcdSRichard Henderson         int16_t dd = aa < bb ? aa : bb;
1204dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1205dd0a0fcdSRichard Henderson     }
1206dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1207dd0a0fcdSRichard Henderson }
1208dd0a0fcdSRichard Henderson 
1209dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1210dd0a0fcdSRichard Henderson {
1211dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1212dd0a0fcdSRichard Henderson     intptr_t i;
1213dd0a0fcdSRichard Henderson 
1214dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1215dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1216dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1217dd0a0fcdSRichard Henderson         int32_t dd = aa < bb ? aa : bb;
1218dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1219dd0a0fcdSRichard Henderson     }
1220dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1221dd0a0fcdSRichard Henderson }
1222dd0a0fcdSRichard Henderson 
1223dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1224dd0a0fcdSRichard Henderson {
1225dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1226dd0a0fcdSRichard Henderson     intptr_t i;
1227dd0a0fcdSRichard Henderson 
1228dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1229dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1230dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1231dd0a0fcdSRichard Henderson         int64_t dd = aa < bb ? aa : bb;
1232dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1233dd0a0fcdSRichard Henderson     }
1234dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1235dd0a0fcdSRichard Henderson }
1236dd0a0fcdSRichard Henderson 
1237dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1238dd0a0fcdSRichard Henderson {
1239dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1240dd0a0fcdSRichard Henderson     intptr_t i;
1241dd0a0fcdSRichard Henderson 
1242dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1243dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1244dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1245dd0a0fcdSRichard Henderson         int8_t dd = aa > bb ? aa : bb;
1246dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1247dd0a0fcdSRichard Henderson     }
1248dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1249dd0a0fcdSRichard Henderson }
1250dd0a0fcdSRichard Henderson 
1251dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1252dd0a0fcdSRichard Henderson {
1253dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1254dd0a0fcdSRichard Henderson     intptr_t i;
1255dd0a0fcdSRichard Henderson 
1256dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1257dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1258dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1259dd0a0fcdSRichard Henderson         int16_t dd = aa > bb ? aa : bb;
1260dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1261dd0a0fcdSRichard Henderson     }
1262dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1263dd0a0fcdSRichard Henderson }
1264dd0a0fcdSRichard Henderson 
1265dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1266dd0a0fcdSRichard Henderson {
1267dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1268dd0a0fcdSRichard Henderson     intptr_t i;
1269dd0a0fcdSRichard Henderson 
1270dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1271dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1272dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1273dd0a0fcdSRichard Henderson         int32_t dd = aa > bb ? aa : bb;
1274dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1275dd0a0fcdSRichard Henderson     }
1276dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1277dd0a0fcdSRichard Henderson }
1278dd0a0fcdSRichard Henderson 
1279dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1280dd0a0fcdSRichard Henderson {
1281dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1282dd0a0fcdSRichard Henderson     intptr_t i;
1283dd0a0fcdSRichard Henderson 
1284dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1285dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1286dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1287dd0a0fcdSRichard Henderson         int64_t dd = aa > bb ? aa : bb;
1288dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1289dd0a0fcdSRichard Henderson     }
1290dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1291dd0a0fcdSRichard Henderson }
1292dd0a0fcdSRichard Henderson 
1293dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1294dd0a0fcdSRichard Henderson {
1295dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1296dd0a0fcdSRichard Henderson     intptr_t i;
1297dd0a0fcdSRichard Henderson 
1298dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1299dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1300dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1301dd0a0fcdSRichard Henderson         uint8_t dd = aa < bb ? aa : bb;
1302dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1303dd0a0fcdSRichard Henderson     }
1304dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1305dd0a0fcdSRichard Henderson }
1306dd0a0fcdSRichard Henderson 
1307dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1308dd0a0fcdSRichard Henderson {
1309dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1310dd0a0fcdSRichard Henderson     intptr_t i;
1311dd0a0fcdSRichard Henderson 
1312dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1313dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1314dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1315dd0a0fcdSRichard Henderson         uint16_t dd = aa < bb ? aa : bb;
1316dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1317dd0a0fcdSRichard Henderson     }
1318dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1319dd0a0fcdSRichard Henderson }
1320dd0a0fcdSRichard Henderson 
1321dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1322dd0a0fcdSRichard Henderson {
1323dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1324dd0a0fcdSRichard Henderson     intptr_t i;
1325dd0a0fcdSRichard Henderson 
1326dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1327dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1328dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1329dd0a0fcdSRichard Henderson         uint32_t dd = aa < bb ? aa : bb;
1330dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1331dd0a0fcdSRichard Henderson     }
1332dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1333dd0a0fcdSRichard Henderson }
1334dd0a0fcdSRichard Henderson 
1335dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1336dd0a0fcdSRichard Henderson {
1337dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1338dd0a0fcdSRichard Henderson     intptr_t i;
1339dd0a0fcdSRichard Henderson 
1340dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1341dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1342dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1343dd0a0fcdSRichard Henderson         uint64_t dd = aa < bb ? aa : bb;
1344dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1345dd0a0fcdSRichard Henderson     }
1346dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1347dd0a0fcdSRichard Henderson }
1348dd0a0fcdSRichard Henderson 
1349dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1350dd0a0fcdSRichard Henderson {
1351dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1352dd0a0fcdSRichard Henderson     intptr_t i;
1353dd0a0fcdSRichard Henderson 
1354dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1355dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1356dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1357dd0a0fcdSRichard Henderson         uint8_t dd = aa > bb ? aa : bb;
1358dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1359dd0a0fcdSRichard Henderson     }
1360dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1361dd0a0fcdSRichard Henderson }
1362dd0a0fcdSRichard Henderson 
1363dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1364dd0a0fcdSRichard Henderson {
1365dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1366dd0a0fcdSRichard Henderson     intptr_t i;
1367dd0a0fcdSRichard Henderson 
1368dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1369dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1370dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1371dd0a0fcdSRichard Henderson         uint16_t dd = aa > bb ? aa : bb;
1372dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1373dd0a0fcdSRichard Henderson     }
1374dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1375dd0a0fcdSRichard Henderson }
1376dd0a0fcdSRichard Henderson 
1377dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1378dd0a0fcdSRichard Henderson {
1379dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1380dd0a0fcdSRichard Henderson     intptr_t i;
1381dd0a0fcdSRichard Henderson 
1382dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1383dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1384dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1385dd0a0fcdSRichard Henderson         uint32_t dd = aa > bb ? aa : bb;
1386dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1387dd0a0fcdSRichard Henderson     }
1388dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1389dd0a0fcdSRichard Henderson }
1390dd0a0fcdSRichard Henderson 
1391dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1392dd0a0fcdSRichard Henderson {
1393dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1394dd0a0fcdSRichard Henderson     intptr_t i;
1395dd0a0fcdSRichard Henderson 
1396dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1397dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1398dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1399dd0a0fcdSRichard Henderson         uint64_t dd = aa > bb ? aa : bb;
1400dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1401dd0a0fcdSRichard Henderson     }
1402dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1403dd0a0fcdSRichard Henderson }
140438dc1294SRichard Henderson 
140538dc1294SRichard Henderson void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
140638dc1294SRichard Henderson {
140738dc1294SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
140838dc1294SRichard Henderson     intptr_t i;
140938dc1294SRichard Henderson 
1410*6c7ab301SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1411*6c7ab301SRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1412*6c7ab301SRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1413*6c7ab301SRichard Henderson         uint64_t cc = *(uint64_t *)(c + i);
1414*6c7ab301SRichard Henderson         *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
141538dc1294SRichard Henderson     }
141638dc1294SRichard Henderson     clear_high(d, oprsz, desc);
141738dc1294SRichard Henderson }
1418