xref: /qemu/accel/tcg/tcg-runtime-gvec.c (revision 899f08ad1d1231dbbfa67298413f05ed2679fb02)
1db432672SRichard Henderson /*
2db432672SRichard Henderson  * Generic vectorized operation runtime
3db432672SRichard Henderson  *
4db432672SRichard Henderson  * Copyright (c) 2018 Linaro
5db432672SRichard Henderson  *
6db432672SRichard Henderson  * This library is free software; you can redistribute it and/or
7db432672SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
8db432672SRichard Henderson  * License as published by the Free Software Foundation; either
9fb0343d5SThomas Huth  * version 2.1 of the License, or (at your option) any later version.
10db432672SRichard Henderson  *
11db432672SRichard Henderson  * This library is distributed in the hope that it will be useful,
12db432672SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13db432672SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14db432672SRichard Henderson  * Lesser General Public License for more details.
15db432672SRichard Henderson  *
16db432672SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
17db432672SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18db432672SRichard Henderson  */
19db432672SRichard Henderson 
20db432672SRichard Henderson #include "qemu/osdep.h"
21db432672SRichard Henderson #include "qemu/host-utils.h"
22db432672SRichard Henderson #include "cpu.h"
23db432672SRichard Henderson #include "exec/helper-proto.h"
24db432672SRichard Henderson #include "tcg-gvec-desc.h"
25db432672SRichard Henderson 
26db432672SRichard Henderson 
27db432672SRichard Henderson /* Virtually all hosts support 16-byte vectors.  Those that don't can emulate
28db432672SRichard Henderson  * them via GCC's generic vector extension.  This turns out to be simpler and
29db432672SRichard Henderson  * more reliable than getting the compiler to autovectorize.
30db432672SRichard Henderson  *
31db432672SRichard Henderson  * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
32db432672SRichard Henderson  * are multiples of 16.
33db432672SRichard Henderson  *
34db432672SRichard Henderson  * When the compiler does not support all of the operations we require, the
35db432672SRichard Henderson  * loops are written so that we can always fall back on the base types.
36db432672SRichard Henderson  */
37db432672SRichard Henderson #ifdef CONFIG_VECTOR16
38db432672SRichard Henderson typedef uint8_t vec8 __attribute__((vector_size(16)));
39db432672SRichard Henderson typedef uint16_t vec16 __attribute__((vector_size(16)));
40db432672SRichard Henderson typedef uint32_t vec32 __attribute__((vector_size(16)));
41db432672SRichard Henderson typedef uint64_t vec64 __attribute__((vector_size(16)));
42db432672SRichard Henderson 
43db432672SRichard Henderson typedef int8_t svec8 __attribute__((vector_size(16)));
44db432672SRichard Henderson typedef int16_t svec16 __attribute__((vector_size(16)));
45db432672SRichard Henderson typedef int32_t svec32 __attribute__((vector_size(16)));
46db432672SRichard Henderson typedef int64_t svec64 __attribute__((vector_size(16)));
47db432672SRichard Henderson 
48db432672SRichard Henderson #define DUP16(X)  { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
49db432672SRichard Henderson #define DUP8(X)   { X, X, X, X, X, X, X, X }
50db432672SRichard Henderson #define DUP4(X)   { X, X, X, X }
51db432672SRichard Henderson #define DUP2(X)   { X, X }
52db432672SRichard Henderson #else
53db432672SRichard Henderson typedef uint8_t vec8;
54db432672SRichard Henderson typedef uint16_t vec16;
55db432672SRichard Henderson typedef uint32_t vec32;
56db432672SRichard Henderson typedef uint64_t vec64;
57db432672SRichard Henderson 
58db432672SRichard Henderson typedef int8_t svec8;
59db432672SRichard Henderson typedef int16_t svec16;
60db432672SRichard Henderson typedef int32_t svec32;
61db432672SRichard Henderson typedef int64_t svec64;
62db432672SRichard Henderson 
63db432672SRichard Henderson #define DUP16(X)  X
64db432672SRichard Henderson #define DUP8(X)   X
65db432672SRichard Henderson #define DUP4(X)   X
66db432672SRichard Henderson #define DUP2(X)   X
67db432672SRichard Henderson #endif /* CONFIG_VECTOR16 */
68db432672SRichard Henderson 
69db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
70db432672SRichard Henderson {
71db432672SRichard Henderson     intptr_t maxsz = simd_maxsz(desc);
72db432672SRichard Henderson     intptr_t i;
73db432672SRichard Henderson 
74db432672SRichard Henderson     if (unlikely(maxsz > oprsz)) {
75db432672SRichard Henderson         for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
76db432672SRichard Henderson             *(uint64_t *)(d + i) = 0;
77db432672SRichard Henderson         }
78db432672SRichard Henderson     }
79db432672SRichard Henderson }
80db432672SRichard Henderson 
81db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
82db432672SRichard Henderson {
83db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
84db432672SRichard Henderson     intptr_t i;
85db432672SRichard Henderson 
86db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
87db432672SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
88db432672SRichard Henderson     }
89db432672SRichard Henderson     clear_high(d, oprsz, desc);
90db432672SRichard Henderson }
91db432672SRichard Henderson 
92db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
93db432672SRichard Henderson {
94db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
95db432672SRichard Henderson     intptr_t i;
96db432672SRichard Henderson 
97db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
98db432672SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
99db432672SRichard Henderson     }
100db432672SRichard Henderson     clear_high(d, oprsz, desc);
101db432672SRichard Henderson }
102db432672SRichard Henderson 
103db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
104db432672SRichard Henderson {
105db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
106db432672SRichard Henderson     intptr_t i;
107db432672SRichard Henderson 
108db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
109db432672SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
110db432672SRichard Henderson     }
111db432672SRichard Henderson     clear_high(d, oprsz, desc);
112db432672SRichard Henderson }
113db432672SRichard Henderson 
114db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
115db432672SRichard Henderson {
116db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
117db432672SRichard Henderson     intptr_t i;
118db432672SRichard Henderson 
119db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
120db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
121db432672SRichard Henderson     }
122db432672SRichard Henderson     clear_high(d, oprsz, desc);
123db432672SRichard Henderson }
124db432672SRichard Henderson 
12522fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
12622fc3527SRichard Henderson {
12722fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
12822fc3527SRichard Henderson     vec8 vecb = (vec8)DUP16(b);
12922fc3527SRichard Henderson     intptr_t i;
13022fc3527SRichard Henderson 
13122fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
13222fc3527SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
13322fc3527SRichard Henderson     }
13422fc3527SRichard Henderson     clear_high(d, oprsz, desc);
13522fc3527SRichard Henderson }
13622fc3527SRichard Henderson 
13722fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
13822fc3527SRichard Henderson {
13922fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
14022fc3527SRichard Henderson     vec16 vecb = (vec16)DUP8(b);
14122fc3527SRichard Henderson     intptr_t i;
14222fc3527SRichard Henderson 
14322fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
14422fc3527SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
14522fc3527SRichard Henderson     }
14622fc3527SRichard Henderson     clear_high(d, oprsz, desc);
14722fc3527SRichard Henderson }
14822fc3527SRichard Henderson 
14922fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
15022fc3527SRichard Henderson {
15122fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
15222fc3527SRichard Henderson     vec32 vecb = (vec32)DUP4(b);
15322fc3527SRichard Henderson     intptr_t i;
15422fc3527SRichard Henderson 
15522fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
15622fc3527SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
15722fc3527SRichard Henderson     }
15822fc3527SRichard Henderson     clear_high(d, oprsz, desc);
15922fc3527SRichard Henderson }
16022fc3527SRichard Henderson 
16122fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
16222fc3527SRichard Henderson {
16322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
16422fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
16522fc3527SRichard Henderson     intptr_t i;
16622fc3527SRichard Henderson 
16722fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
16822fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
16922fc3527SRichard Henderson     }
17022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
17122fc3527SRichard Henderson }
17222fc3527SRichard Henderson 
173db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
174db432672SRichard Henderson {
175db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
176db432672SRichard Henderson     intptr_t i;
177db432672SRichard Henderson 
178db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
179db432672SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
180db432672SRichard Henderson     }
181db432672SRichard Henderson     clear_high(d, oprsz, desc);
182db432672SRichard Henderson }
183db432672SRichard Henderson 
184db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
185db432672SRichard Henderson {
186db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
187db432672SRichard Henderson     intptr_t i;
188db432672SRichard Henderson 
189db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
190db432672SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
191db432672SRichard Henderson     }
192db432672SRichard Henderson     clear_high(d, oprsz, desc);
193db432672SRichard Henderson }
194db432672SRichard Henderson 
195db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
196db432672SRichard Henderson {
197db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
198db432672SRichard Henderson     intptr_t i;
199db432672SRichard Henderson 
200db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
201db432672SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
202db432672SRichard Henderson     }
203db432672SRichard Henderson     clear_high(d, oprsz, desc);
204db432672SRichard Henderson }
205db432672SRichard Henderson 
206db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
207db432672SRichard Henderson {
208db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
209db432672SRichard Henderson     intptr_t i;
210db432672SRichard Henderson 
211db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
212db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
213db432672SRichard Henderson     }
214db432672SRichard Henderson     clear_high(d, oprsz, desc);
215db432672SRichard Henderson }
216db432672SRichard Henderson 
21722fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
21822fc3527SRichard Henderson {
21922fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
22022fc3527SRichard Henderson     vec8 vecb = (vec8)DUP16(b);
22122fc3527SRichard Henderson     intptr_t i;
22222fc3527SRichard Henderson 
22322fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
22422fc3527SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
22522fc3527SRichard Henderson     }
22622fc3527SRichard Henderson     clear_high(d, oprsz, desc);
22722fc3527SRichard Henderson }
22822fc3527SRichard Henderson 
22922fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
23022fc3527SRichard Henderson {
23122fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
23222fc3527SRichard Henderson     vec16 vecb = (vec16)DUP8(b);
23322fc3527SRichard Henderson     intptr_t i;
23422fc3527SRichard Henderson 
23522fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
23622fc3527SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
23722fc3527SRichard Henderson     }
23822fc3527SRichard Henderson     clear_high(d, oprsz, desc);
23922fc3527SRichard Henderson }
24022fc3527SRichard Henderson 
24122fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
24222fc3527SRichard Henderson {
24322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
24422fc3527SRichard Henderson     vec32 vecb = (vec32)DUP4(b);
24522fc3527SRichard Henderson     intptr_t i;
24622fc3527SRichard Henderson 
24722fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
24822fc3527SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
24922fc3527SRichard Henderson     }
25022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
25122fc3527SRichard Henderson }
25222fc3527SRichard Henderson 
25322fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
25422fc3527SRichard Henderson {
25522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
25622fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
25722fc3527SRichard Henderson     intptr_t i;
25822fc3527SRichard Henderson 
25922fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
26022fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
26122fc3527SRichard Henderson     }
26222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
26322fc3527SRichard Henderson }
26422fc3527SRichard Henderson 
2653774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
2663774030aSRichard Henderson {
2673774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2683774030aSRichard Henderson     intptr_t i;
2693774030aSRichard Henderson 
2703774030aSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
2713774030aSRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
2723774030aSRichard Henderson     }
2733774030aSRichard Henderson     clear_high(d, oprsz, desc);
2743774030aSRichard Henderson }
2753774030aSRichard Henderson 
2763774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
2773774030aSRichard Henderson {
2783774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2793774030aSRichard Henderson     intptr_t i;
2803774030aSRichard Henderson 
2813774030aSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
2823774030aSRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
2833774030aSRichard Henderson     }
2843774030aSRichard Henderson     clear_high(d, oprsz, desc);
2853774030aSRichard Henderson }
2863774030aSRichard Henderson 
2873774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
2883774030aSRichard Henderson {
2893774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2903774030aSRichard Henderson     intptr_t i;
2913774030aSRichard Henderson 
2923774030aSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
2933774030aSRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
2943774030aSRichard Henderson     }
2953774030aSRichard Henderson     clear_high(d, oprsz, desc);
2963774030aSRichard Henderson }
2973774030aSRichard Henderson 
2983774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
2993774030aSRichard Henderson {
3003774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
3013774030aSRichard Henderson     intptr_t i;
3023774030aSRichard Henderson 
3033774030aSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
3043774030aSRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
3053774030aSRichard Henderson     }
3063774030aSRichard Henderson     clear_high(d, oprsz, desc);
3073774030aSRichard Henderson }
3083774030aSRichard Henderson 
30922fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
31022fc3527SRichard Henderson {
31122fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
31222fc3527SRichard Henderson     vec8 vecb = (vec8)DUP16(b);
31322fc3527SRichard Henderson     intptr_t i;
31422fc3527SRichard Henderson 
31522fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
31622fc3527SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
31722fc3527SRichard Henderson     }
31822fc3527SRichard Henderson     clear_high(d, oprsz, desc);
31922fc3527SRichard Henderson }
32022fc3527SRichard Henderson 
32122fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
32222fc3527SRichard Henderson {
32322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
32422fc3527SRichard Henderson     vec16 vecb = (vec16)DUP8(b);
32522fc3527SRichard Henderson     intptr_t i;
32622fc3527SRichard Henderson 
32722fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
32822fc3527SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
32922fc3527SRichard Henderson     }
33022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
33122fc3527SRichard Henderson }
33222fc3527SRichard Henderson 
33322fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
33422fc3527SRichard Henderson {
33522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
33622fc3527SRichard Henderson     vec32 vecb = (vec32)DUP4(b);
33722fc3527SRichard Henderson     intptr_t i;
33822fc3527SRichard Henderson 
33922fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
34022fc3527SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
34122fc3527SRichard Henderson     }
34222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
34322fc3527SRichard Henderson }
34422fc3527SRichard Henderson 
34522fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
34622fc3527SRichard Henderson {
34722fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
34822fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
34922fc3527SRichard Henderson     intptr_t i;
35022fc3527SRichard Henderson 
35122fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
35222fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
35322fc3527SRichard Henderson     }
35422fc3527SRichard Henderson     clear_high(d, oprsz, desc);
35522fc3527SRichard Henderson }
35622fc3527SRichard Henderson 
357db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
358db432672SRichard Henderson {
359db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
360db432672SRichard Henderson     intptr_t i;
361db432672SRichard Henderson 
362db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
363db432672SRichard Henderson         *(vec8 *)(d + i) = -*(vec8 *)(a + i);
364db432672SRichard Henderson     }
365db432672SRichard Henderson     clear_high(d, oprsz, desc);
366db432672SRichard Henderson }
367db432672SRichard Henderson 
368db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
369db432672SRichard Henderson {
370db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
371db432672SRichard Henderson     intptr_t i;
372db432672SRichard Henderson 
373db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
374db432672SRichard Henderson         *(vec16 *)(d + i) = -*(vec16 *)(a + i);
375db432672SRichard Henderson     }
376db432672SRichard Henderson     clear_high(d, oprsz, desc);
377db432672SRichard Henderson }
378db432672SRichard Henderson 
379db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
380db432672SRichard Henderson {
381db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
382db432672SRichard Henderson     intptr_t i;
383db432672SRichard Henderson 
384db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
385db432672SRichard Henderson         *(vec32 *)(d + i) = -*(vec32 *)(a + i);
386db432672SRichard Henderson     }
387db432672SRichard Henderson     clear_high(d, oprsz, desc);
388db432672SRichard Henderson }
389db432672SRichard Henderson 
390db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
391db432672SRichard Henderson {
392db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
393db432672SRichard Henderson     intptr_t i;
394db432672SRichard Henderson 
395db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
396db432672SRichard Henderson         *(vec64 *)(d + i) = -*(vec64 *)(a + i);
397db432672SRichard Henderson     }
398db432672SRichard Henderson     clear_high(d, oprsz, desc);
399db432672SRichard Henderson }
400db432672SRichard Henderson 
401bcefc902SRichard Henderson void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
402bcefc902SRichard Henderson {
403bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
404bcefc902SRichard Henderson     intptr_t i;
405bcefc902SRichard Henderson 
406bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
407bcefc902SRichard Henderson         int8_t aa = *(int8_t *)(a + i);
408bcefc902SRichard Henderson         *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
409bcefc902SRichard Henderson     }
410bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
411bcefc902SRichard Henderson }
412bcefc902SRichard Henderson 
413bcefc902SRichard Henderson void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
414bcefc902SRichard Henderson {
415bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
416bcefc902SRichard Henderson     intptr_t i;
417bcefc902SRichard Henderson 
418bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
419bcefc902SRichard Henderson         int16_t aa = *(int16_t *)(a + i);
420bcefc902SRichard Henderson         *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
421bcefc902SRichard Henderson     }
422bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
423bcefc902SRichard Henderson }
424bcefc902SRichard Henderson 
425bcefc902SRichard Henderson void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
426bcefc902SRichard Henderson {
427bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
428bcefc902SRichard Henderson     intptr_t i;
429bcefc902SRichard Henderson 
430bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
431bcefc902SRichard Henderson         int32_t aa = *(int32_t *)(a + i);
432bcefc902SRichard Henderson         *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
433bcefc902SRichard Henderson     }
434bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
435bcefc902SRichard Henderson }
436bcefc902SRichard Henderson 
437bcefc902SRichard Henderson void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
438bcefc902SRichard Henderson {
439bcefc902SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
440bcefc902SRichard Henderson     intptr_t i;
441bcefc902SRichard Henderson 
442bcefc902SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
443bcefc902SRichard Henderson         int64_t aa = *(int64_t *)(a + i);
444bcefc902SRichard Henderson         *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
445bcefc902SRichard Henderson     }
446bcefc902SRichard Henderson     clear_high(d, oprsz, desc);
447bcefc902SRichard Henderson }
448bcefc902SRichard Henderson 
449db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
450db432672SRichard Henderson {
451db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
452db432672SRichard Henderson 
453db432672SRichard Henderson     memcpy(d, a, oprsz);
454db432672SRichard Henderson     clear_high(d, oprsz, desc);
455db432672SRichard Henderson }
456db432672SRichard Henderson 
457db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
458db432672SRichard Henderson {
459db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
460db432672SRichard Henderson     intptr_t i;
461db432672SRichard Henderson 
462db432672SRichard Henderson     if (c == 0) {
463db432672SRichard Henderson         oprsz = 0;
464db432672SRichard Henderson     } else {
465db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
466db432672SRichard Henderson             *(uint64_t *)(d + i) = c;
467db432672SRichard Henderson         }
468db432672SRichard Henderson     }
469db432672SRichard Henderson     clear_high(d, oprsz, desc);
470db432672SRichard Henderson }
471db432672SRichard Henderson 
472db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
473db432672SRichard Henderson {
474db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
475db432672SRichard Henderson     intptr_t i;
476db432672SRichard Henderson 
477db432672SRichard Henderson     if (c == 0) {
478db432672SRichard Henderson         oprsz = 0;
479db432672SRichard Henderson     } else {
480db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
481db432672SRichard Henderson             *(uint32_t *)(d + i) = c;
482db432672SRichard Henderson         }
483db432672SRichard Henderson     }
484db432672SRichard Henderson     clear_high(d, oprsz, desc);
485db432672SRichard Henderson }
486db432672SRichard Henderson 
487db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
488db432672SRichard Henderson {
489db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
490db432672SRichard Henderson }
491db432672SRichard Henderson 
492db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
493db432672SRichard Henderson {
494db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
495db432672SRichard Henderson }
496db432672SRichard Henderson 
497db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
498db432672SRichard Henderson {
499db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
500db432672SRichard Henderson     intptr_t i;
501db432672SRichard Henderson 
502db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
503db432672SRichard Henderson         *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
504db432672SRichard Henderson     }
505db432672SRichard Henderson     clear_high(d, oprsz, desc);
506db432672SRichard Henderson }
507db432672SRichard Henderson 
508db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
509db432672SRichard Henderson {
510db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
511db432672SRichard Henderson     intptr_t i;
512db432672SRichard Henderson 
513db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
514db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
515db432672SRichard Henderson     }
516db432672SRichard Henderson     clear_high(d, oprsz, desc);
517db432672SRichard Henderson }
518db432672SRichard Henderson 
519db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
520db432672SRichard Henderson {
521db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
522db432672SRichard Henderson     intptr_t i;
523db432672SRichard Henderson 
524db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
525db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
526db432672SRichard Henderson     }
527db432672SRichard Henderson     clear_high(d, oprsz, desc);
528db432672SRichard Henderson }
529db432672SRichard Henderson 
530db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
531db432672SRichard Henderson {
532db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
533db432672SRichard Henderson     intptr_t i;
534db432672SRichard Henderson 
535db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
536db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
537db432672SRichard Henderson     }
538db432672SRichard Henderson     clear_high(d, oprsz, desc);
539db432672SRichard Henderson }
540db432672SRichard Henderson 
541db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
542db432672SRichard Henderson {
543db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
544db432672SRichard Henderson     intptr_t i;
545db432672SRichard Henderson 
546db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
547db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
548db432672SRichard Henderson     }
549db432672SRichard Henderson     clear_high(d, oprsz, desc);
550db432672SRichard Henderson }
551db432672SRichard Henderson 
552db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
553db432672SRichard Henderson {
554db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
555db432672SRichard Henderson     intptr_t i;
556db432672SRichard Henderson 
557db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
558db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
559db432672SRichard Henderson     }
560db432672SRichard Henderson     clear_high(d, oprsz, desc);
561db432672SRichard Henderson }
562d0ec9796SRichard Henderson 
563f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
564f550805dSRichard Henderson {
565f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
566f550805dSRichard Henderson     intptr_t i;
567f550805dSRichard Henderson 
568f550805dSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
569f550805dSRichard Henderson         *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i));
570f550805dSRichard Henderson     }
571f550805dSRichard Henderson     clear_high(d, oprsz, desc);
572f550805dSRichard Henderson }
573f550805dSRichard Henderson 
574f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
575f550805dSRichard Henderson {
576f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
577f550805dSRichard Henderson     intptr_t i;
578f550805dSRichard Henderson 
579f550805dSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
580f550805dSRichard Henderson         *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i));
581f550805dSRichard Henderson     }
582f550805dSRichard Henderson     clear_high(d, oprsz, desc);
583f550805dSRichard Henderson }
584f550805dSRichard Henderson 
585f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
586f550805dSRichard Henderson {
587f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
588f550805dSRichard Henderson     intptr_t i;
589f550805dSRichard Henderson 
590f550805dSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
591f550805dSRichard Henderson         *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i));
592f550805dSRichard Henderson     }
593f550805dSRichard Henderson     clear_high(d, oprsz, desc);
594f550805dSRichard Henderson }
595f550805dSRichard Henderson 
59622fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
59722fc3527SRichard Henderson {
59822fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
59922fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
60022fc3527SRichard Henderson     intptr_t i;
60122fc3527SRichard Henderson 
60222fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
60322fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
60422fc3527SRichard Henderson     }
60522fc3527SRichard Henderson     clear_high(d, oprsz, desc);
60622fc3527SRichard Henderson }
60722fc3527SRichard Henderson 
60822fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
60922fc3527SRichard Henderson {
61022fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
61122fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
61222fc3527SRichard Henderson     intptr_t i;
61322fc3527SRichard Henderson 
61422fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
61522fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
61622fc3527SRichard Henderson     }
61722fc3527SRichard Henderson     clear_high(d, oprsz, desc);
61822fc3527SRichard Henderson }
61922fc3527SRichard Henderson 
62022fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
62122fc3527SRichard Henderson {
62222fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
62322fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
62422fc3527SRichard Henderson     intptr_t i;
62522fc3527SRichard Henderson 
62622fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
62722fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
62822fc3527SRichard Henderson     }
62922fc3527SRichard Henderson     clear_high(d, oprsz, desc);
63022fc3527SRichard Henderson }
63122fc3527SRichard Henderson 
632d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
633d0ec9796SRichard Henderson {
634d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
635d0ec9796SRichard Henderson     int shift = simd_data(desc);
636d0ec9796SRichard Henderson     intptr_t i;
637d0ec9796SRichard Henderson 
638d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
639d0ec9796SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
640d0ec9796SRichard Henderson     }
641d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
642d0ec9796SRichard Henderson }
643d0ec9796SRichard Henderson 
644d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
645d0ec9796SRichard Henderson {
646d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
647d0ec9796SRichard Henderson     int shift = simd_data(desc);
648d0ec9796SRichard Henderson     intptr_t i;
649d0ec9796SRichard Henderson 
650d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
651d0ec9796SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
652d0ec9796SRichard Henderson     }
653d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
654d0ec9796SRichard Henderson }
655d0ec9796SRichard Henderson 
656d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
657d0ec9796SRichard Henderson {
658d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
659d0ec9796SRichard Henderson     int shift = simd_data(desc);
660d0ec9796SRichard Henderson     intptr_t i;
661d0ec9796SRichard Henderson 
662d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
663d0ec9796SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
664d0ec9796SRichard Henderson     }
665d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
666d0ec9796SRichard Henderson }
667d0ec9796SRichard Henderson 
668d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
669d0ec9796SRichard Henderson {
670d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
671d0ec9796SRichard Henderson     int shift = simd_data(desc);
672d0ec9796SRichard Henderson     intptr_t i;
673d0ec9796SRichard Henderson 
674d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
675d0ec9796SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
676d0ec9796SRichard Henderson     }
677d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
678d0ec9796SRichard Henderson }
679d0ec9796SRichard Henderson 
680d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
681d0ec9796SRichard Henderson {
682d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
683d0ec9796SRichard Henderson     int shift = simd_data(desc);
684d0ec9796SRichard Henderson     intptr_t i;
685d0ec9796SRichard Henderson 
686d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
687d0ec9796SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
688d0ec9796SRichard Henderson     }
689d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
690d0ec9796SRichard Henderson }
691d0ec9796SRichard Henderson 
692d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
693d0ec9796SRichard Henderson {
694d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
695d0ec9796SRichard Henderson     int shift = simd_data(desc);
696d0ec9796SRichard Henderson     intptr_t i;
697d0ec9796SRichard Henderson 
698d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
699d0ec9796SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
700d0ec9796SRichard Henderson     }
701d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
702d0ec9796SRichard Henderson }
703d0ec9796SRichard Henderson 
704d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
705d0ec9796SRichard Henderson {
706d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
707d0ec9796SRichard Henderson     int shift = simd_data(desc);
708d0ec9796SRichard Henderson     intptr_t i;
709d0ec9796SRichard Henderson 
710d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
711d0ec9796SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
712d0ec9796SRichard Henderson     }
713d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
714d0ec9796SRichard Henderson }
715d0ec9796SRichard Henderson 
716d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
717d0ec9796SRichard Henderson {
718d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
719d0ec9796SRichard Henderson     int shift = simd_data(desc);
720d0ec9796SRichard Henderson     intptr_t i;
721d0ec9796SRichard Henderson 
722d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
723d0ec9796SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
724d0ec9796SRichard Henderson     }
725d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
726d0ec9796SRichard Henderson }
727d0ec9796SRichard Henderson 
728d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
729d0ec9796SRichard Henderson {
730d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
731d0ec9796SRichard Henderson     int shift = simd_data(desc);
732d0ec9796SRichard Henderson     intptr_t i;
733d0ec9796SRichard Henderson 
734d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
735d0ec9796SRichard Henderson         *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
736d0ec9796SRichard Henderson     }
737d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
738d0ec9796SRichard Henderson }
739d0ec9796SRichard Henderson 
740d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
741d0ec9796SRichard Henderson {
742d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
743d0ec9796SRichard Henderson     int shift = simd_data(desc);
744d0ec9796SRichard Henderson     intptr_t i;
745d0ec9796SRichard Henderson 
746d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
747d0ec9796SRichard Henderson         *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
748d0ec9796SRichard Henderson     }
749d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
750d0ec9796SRichard Henderson }
751d0ec9796SRichard Henderson 
752d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
753d0ec9796SRichard Henderson {
754d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
755d0ec9796SRichard Henderson     int shift = simd_data(desc);
756d0ec9796SRichard Henderson     intptr_t i;
757d0ec9796SRichard Henderson 
758d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
759d0ec9796SRichard Henderson         *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
760d0ec9796SRichard Henderson     }
761d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
762d0ec9796SRichard Henderson }
763d0ec9796SRichard Henderson 
764d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
765d0ec9796SRichard Henderson {
766d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
767d0ec9796SRichard Henderson     int shift = simd_data(desc);
768d0ec9796SRichard Henderson     intptr_t i;
769d0ec9796SRichard Henderson 
770d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
771d0ec9796SRichard Henderson         *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
772d0ec9796SRichard Henderson     }
773d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
774d0ec9796SRichard Henderson }
775212be173SRichard Henderson 
7765ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
7775ee5c14cSRichard Henderson {
7785ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7795ee5c14cSRichard Henderson     intptr_t i;
7805ee5c14cSRichard Henderson 
7815ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
7825ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
7835ee5c14cSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
7845ee5c14cSRichard Henderson     }
7855ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7865ee5c14cSRichard Henderson }
7875ee5c14cSRichard Henderson 
7885ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
7895ee5c14cSRichard Henderson {
7905ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
7915ee5c14cSRichard Henderson     intptr_t i;
7925ee5c14cSRichard Henderson 
7935ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
7945ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
7955ee5c14cSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
7965ee5c14cSRichard Henderson     }
7975ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
7985ee5c14cSRichard Henderson }
7995ee5c14cSRichard Henderson 
8005ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
8015ee5c14cSRichard Henderson {
8025ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8035ee5c14cSRichard Henderson     intptr_t i;
8045ee5c14cSRichard Henderson 
8055ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
8065ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
8075ee5c14cSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
8085ee5c14cSRichard Henderson     }
8095ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8105ee5c14cSRichard Henderson }
8115ee5c14cSRichard Henderson 
8125ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
8135ee5c14cSRichard Henderson {
8145ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8155ee5c14cSRichard Henderson     intptr_t i;
8165ee5c14cSRichard Henderson 
8175ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8185ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
8195ee5c14cSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
8205ee5c14cSRichard Henderson     }
8215ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8225ee5c14cSRichard Henderson }
8235ee5c14cSRichard Henderson 
8245ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
8255ee5c14cSRichard Henderson {
8265ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8275ee5c14cSRichard Henderson     intptr_t i;
8285ee5c14cSRichard Henderson 
8295ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
8305ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
8315ee5c14cSRichard Henderson         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
8325ee5c14cSRichard Henderson     }
8335ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8345ee5c14cSRichard Henderson }
8355ee5c14cSRichard Henderson 
8365ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
8375ee5c14cSRichard Henderson {
8385ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8395ee5c14cSRichard Henderson     intptr_t i;
8405ee5c14cSRichard Henderson 
8415ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
8425ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
8435ee5c14cSRichard Henderson         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
8445ee5c14cSRichard Henderson     }
8455ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8465ee5c14cSRichard Henderson }
8475ee5c14cSRichard Henderson 
8485ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
8495ee5c14cSRichard Henderson {
8505ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8515ee5c14cSRichard Henderson     intptr_t i;
8525ee5c14cSRichard Henderson 
8535ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
8545ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
8555ee5c14cSRichard Henderson         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
8565ee5c14cSRichard Henderson     }
8575ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8585ee5c14cSRichard Henderson }
8595ee5c14cSRichard Henderson 
8605ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
8615ee5c14cSRichard Henderson {
8625ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8635ee5c14cSRichard Henderson     intptr_t i;
8645ee5c14cSRichard Henderson 
8655ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
8665ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
8675ee5c14cSRichard Henderson         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
8685ee5c14cSRichard Henderson     }
8695ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8705ee5c14cSRichard Henderson }
8715ee5c14cSRichard Henderson 
8725ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
8735ee5c14cSRichard Henderson {
8745ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8755ee5c14cSRichard Henderson     intptr_t i;
8765ee5c14cSRichard Henderson 
877*899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
8785ee5c14cSRichard Henderson         uint8_t sh = *(uint8_t *)(b + i) & 7;
8795ee5c14cSRichard Henderson         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
8805ee5c14cSRichard Henderson     }
8815ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8825ee5c14cSRichard Henderson }
8835ee5c14cSRichard Henderson 
8845ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
8855ee5c14cSRichard Henderson {
8865ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8875ee5c14cSRichard Henderson     intptr_t i;
8885ee5c14cSRichard Henderson 
8895ee5c14cSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
8905ee5c14cSRichard Henderson         uint8_t sh = *(uint16_t *)(b + i) & 15;
8915ee5c14cSRichard Henderson         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
8925ee5c14cSRichard Henderson     }
8935ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
8945ee5c14cSRichard Henderson }
8955ee5c14cSRichard Henderson 
8965ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
8975ee5c14cSRichard Henderson {
8985ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
8995ee5c14cSRichard Henderson     intptr_t i;
9005ee5c14cSRichard Henderson 
901*899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
9025ee5c14cSRichard Henderson         uint8_t sh = *(uint32_t *)(b + i) & 31;
9035ee5c14cSRichard Henderson         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
9045ee5c14cSRichard Henderson     }
9055ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
9065ee5c14cSRichard Henderson }
9075ee5c14cSRichard Henderson 
9085ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
9095ee5c14cSRichard Henderson {
9105ee5c14cSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
9115ee5c14cSRichard Henderson     intptr_t i;
9125ee5c14cSRichard Henderson 
913*899f08adSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
9145ee5c14cSRichard Henderson         uint8_t sh = *(uint64_t *)(b + i) & 63;
9155ee5c14cSRichard Henderson         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
9165ee5c14cSRichard Henderson     }
9175ee5c14cSRichard Henderson     clear_high(d, oprsz, desc);
9185ee5c14cSRichard Henderson }
9195ee5c14cSRichard Henderson 
920212be173SRichard Henderson /* If vectors are enabled, the compiler fills in -1 for true.
921212be173SRichard Henderson    Otherwise, we must take care of this by hand.  */
922212be173SRichard Henderson #ifdef CONFIG_VECTOR16
923212be173SRichard Henderson # define DO_CMP0(X)  X
924212be173SRichard Henderson #else
925212be173SRichard Henderson # define DO_CMP0(X)  -(X)
926212be173SRichard Henderson #endif
927212be173SRichard Henderson 
928212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP)                                            \
929212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
930212be173SRichard Henderson {                                                                          \
931212be173SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);                                     \
932212be173SRichard Henderson     intptr_t i;                                                            \
9336cb1d3b8SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
934212be173SRichard Henderson         *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i));  \
935212be173SRichard Henderson     }                                                                      \
936212be173SRichard Henderson     clear_high(d, oprsz, desc);                                            \
937212be173SRichard Henderson }
938212be173SRichard Henderson 
939212be173SRichard Henderson #define DO_CMP2(SZ) \
940212be173SRichard Henderson     DO_CMP1(gvec_eq##SZ, vec##SZ, ==)    \
941212be173SRichard Henderson     DO_CMP1(gvec_ne##SZ, vec##SZ, !=)    \
942212be173SRichard Henderson     DO_CMP1(gvec_lt##SZ, svec##SZ, <)    \
943212be173SRichard Henderson     DO_CMP1(gvec_le##SZ, svec##SZ, <=)   \
944212be173SRichard Henderson     DO_CMP1(gvec_ltu##SZ, vec##SZ, <)    \
945212be173SRichard Henderson     DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
946212be173SRichard Henderson 
947212be173SRichard Henderson DO_CMP2(8)
948212be173SRichard Henderson DO_CMP2(16)
949212be173SRichard Henderson DO_CMP2(32)
950212be173SRichard Henderson DO_CMP2(64)
951212be173SRichard Henderson 
952212be173SRichard Henderson #undef DO_CMP0
953212be173SRichard Henderson #undef DO_CMP1
954212be173SRichard Henderson #undef DO_CMP2
955f49b12c6SRichard Henderson 
956f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
957f49b12c6SRichard Henderson {
958f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
959f49b12c6SRichard Henderson     intptr_t i;
960f49b12c6SRichard Henderson 
961f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
962f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
963f49b12c6SRichard Henderson         if (r > INT8_MAX) {
964f49b12c6SRichard Henderson             r = INT8_MAX;
965f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
966f49b12c6SRichard Henderson             r = INT8_MIN;
967f49b12c6SRichard Henderson         }
968f49b12c6SRichard Henderson         *(int8_t *)(d + i) = r;
969f49b12c6SRichard Henderson     }
970f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
971f49b12c6SRichard Henderson }
972f49b12c6SRichard Henderson 
973f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
974f49b12c6SRichard Henderson {
975f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
976f49b12c6SRichard Henderson     intptr_t i;
977f49b12c6SRichard Henderson 
978f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
979f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
980f49b12c6SRichard Henderson         if (r > INT16_MAX) {
981f49b12c6SRichard Henderson             r = INT16_MAX;
982f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
983f49b12c6SRichard Henderson             r = INT16_MIN;
984f49b12c6SRichard Henderson         }
985f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
986f49b12c6SRichard Henderson     }
987f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
988f49b12c6SRichard Henderson }
989f49b12c6SRichard Henderson 
990f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
991f49b12c6SRichard Henderson {
992f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
993f49b12c6SRichard Henderson     intptr_t i;
994f49b12c6SRichard Henderson 
995f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
996f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
997f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
998f49b12c6SRichard Henderson         int32_t di = ai + bi;
999f49b12c6SRichard Henderson         if (((di ^ ai) &~ (ai ^ bi)) < 0) {
1000f49b12c6SRichard Henderson             /* Signed overflow.  */
1001f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
1002f49b12c6SRichard Henderson         }
1003f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
1004f49b12c6SRichard Henderson     }
1005f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1006f49b12c6SRichard Henderson }
1007f49b12c6SRichard Henderson 
1008f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1009f49b12c6SRichard Henderson {
1010f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1011f49b12c6SRichard Henderson     intptr_t i;
1012f49b12c6SRichard Henderson 
1013f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1014f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
1015f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
1016f49b12c6SRichard Henderson         int64_t di = ai + bi;
1017f49b12c6SRichard Henderson         if (((di ^ ai) &~ (ai ^ bi)) < 0) {
1018f49b12c6SRichard Henderson             /* Signed overflow.  */
1019f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
1020f49b12c6SRichard Henderson         }
1021f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
1022f49b12c6SRichard Henderson     }
1023f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1024f49b12c6SRichard Henderson }
1025f49b12c6SRichard Henderson 
1026f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1027f49b12c6SRichard Henderson {
1028f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1029f49b12c6SRichard Henderson     intptr_t i;
1030f49b12c6SRichard Henderson 
1031f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1032f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1033f49b12c6SRichard Henderson         if (r > INT8_MAX) {
1034f49b12c6SRichard Henderson             r = INT8_MAX;
1035f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
1036f49b12c6SRichard Henderson             r = INT8_MIN;
1037f49b12c6SRichard Henderson         }
1038f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1039f49b12c6SRichard Henderson     }
1040f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1041f49b12c6SRichard Henderson }
1042f49b12c6SRichard Henderson 
1043f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1044f49b12c6SRichard Henderson {
1045f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1046f49b12c6SRichard Henderson     intptr_t i;
1047f49b12c6SRichard Henderson 
1048f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1049f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1050f49b12c6SRichard Henderson         if (r > INT16_MAX) {
1051f49b12c6SRichard Henderson             r = INT16_MAX;
1052f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
1053f49b12c6SRichard Henderson             r = INT16_MIN;
1054f49b12c6SRichard Henderson         }
1055f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
1056f49b12c6SRichard Henderson     }
1057f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1058f49b12c6SRichard Henderson }
1059f49b12c6SRichard Henderson 
1060f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1061f49b12c6SRichard Henderson {
1062f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1063f49b12c6SRichard Henderson     intptr_t i;
1064f49b12c6SRichard Henderson 
1065f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1066f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
1067f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
1068f49b12c6SRichard Henderson         int32_t di = ai - bi;
1069f49b12c6SRichard Henderson         if (((di ^ ai) & (ai ^ bi)) < 0) {
1070f49b12c6SRichard Henderson             /* Signed overflow.  */
1071f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
1072f49b12c6SRichard Henderson         }
1073f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
1074f49b12c6SRichard Henderson     }
1075f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1076f49b12c6SRichard Henderson }
1077f49b12c6SRichard Henderson 
1078f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1079f49b12c6SRichard Henderson {
1080f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1081f49b12c6SRichard Henderson     intptr_t i;
1082f49b12c6SRichard Henderson 
1083f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1084f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
1085f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
1086f49b12c6SRichard Henderson         int64_t di = ai - bi;
1087f49b12c6SRichard Henderson         if (((di ^ ai) & (ai ^ bi)) < 0) {
1088f49b12c6SRichard Henderson             /* Signed overflow.  */
1089f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
1090f49b12c6SRichard Henderson         }
1091f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
1092f49b12c6SRichard Henderson     }
1093f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1094f49b12c6SRichard Henderson }
1095f49b12c6SRichard Henderson 
1096f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1097f49b12c6SRichard Henderson {
1098f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1099f49b12c6SRichard Henderson     intptr_t i;
1100f49b12c6SRichard Henderson 
1101f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1102f49b12c6SRichard Henderson         unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1103f49b12c6SRichard Henderson         if (r > UINT8_MAX) {
1104f49b12c6SRichard Henderson             r = UINT8_MAX;
1105f49b12c6SRichard Henderson         }
1106f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1107f49b12c6SRichard Henderson     }
1108f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1109f49b12c6SRichard Henderson }
1110f49b12c6SRichard Henderson 
1111f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1112f49b12c6SRichard Henderson {
1113f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1114f49b12c6SRichard Henderson     intptr_t i;
1115f49b12c6SRichard Henderson 
1116f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1117f49b12c6SRichard Henderson         unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1118f49b12c6SRichard Henderson         if (r > UINT16_MAX) {
1119f49b12c6SRichard Henderson             r = UINT16_MAX;
1120f49b12c6SRichard Henderson         }
1121f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
1122f49b12c6SRichard Henderson     }
1123f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1124f49b12c6SRichard Henderson }
1125f49b12c6SRichard Henderson 
1126f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1127f49b12c6SRichard Henderson {
1128f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1129f49b12c6SRichard Henderson     intptr_t i;
1130f49b12c6SRichard Henderson 
1131f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1132f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1133f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
1134f49b12c6SRichard Henderson         uint32_t di = ai + bi;
1135f49b12c6SRichard Henderson         if (di < ai) {
1136f49b12c6SRichard Henderson             di = UINT32_MAX;
1137f49b12c6SRichard Henderson         }
1138f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1139f49b12c6SRichard Henderson     }
1140f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1141f49b12c6SRichard Henderson }
1142f49b12c6SRichard Henderson 
1143f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1144f49b12c6SRichard Henderson {
1145f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1146f49b12c6SRichard Henderson     intptr_t i;
1147f49b12c6SRichard Henderson 
1148f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1149f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1150f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
1151f49b12c6SRichard Henderson         uint64_t di = ai + bi;
1152f49b12c6SRichard Henderson         if (di < ai) {
1153f49b12c6SRichard Henderson             di = UINT64_MAX;
1154f49b12c6SRichard Henderson         }
1155f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1156f49b12c6SRichard Henderson     }
1157f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1158f49b12c6SRichard Henderson }
1159f49b12c6SRichard Henderson 
1160f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1161f49b12c6SRichard Henderson {
1162f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1163f49b12c6SRichard Henderson     intptr_t i;
1164f49b12c6SRichard Henderson 
1165f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1166f49b12c6SRichard Henderson         int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1167f49b12c6SRichard Henderson         if (r < 0) {
1168f49b12c6SRichard Henderson             r = 0;
1169f49b12c6SRichard Henderson         }
1170f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
1171f49b12c6SRichard Henderson     }
1172f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1173f49b12c6SRichard Henderson }
1174f49b12c6SRichard Henderson 
1175f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1176f49b12c6SRichard Henderson {
1177f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1178f49b12c6SRichard Henderson     intptr_t i;
1179f49b12c6SRichard Henderson 
1180f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1181f49b12c6SRichard Henderson         int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1182f49b12c6SRichard Henderson         if (r < 0) {
1183f49b12c6SRichard Henderson             r = 0;
1184f49b12c6SRichard Henderson         }
1185f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
1186f49b12c6SRichard Henderson     }
1187f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1188f49b12c6SRichard Henderson }
1189f49b12c6SRichard Henderson 
1190f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1191f49b12c6SRichard Henderson {
1192f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1193f49b12c6SRichard Henderson     intptr_t i;
1194f49b12c6SRichard Henderson 
1195f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1196f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1197f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
1198f49b12c6SRichard Henderson         uint32_t di = ai - bi;
1199f49b12c6SRichard Henderson         if (ai < bi) {
1200f49b12c6SRichard Henderson             di = 0;
1201f49b12c6SRichard Henderson         }
1202f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1203f49b12c6SRichard Henderson     }
1204f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1205f49b12c6SRichard Henderson }
1206f49b12c6SRichard Henderson 
1207f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1208f49b12c6SRichard Henderson {
1209f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1210f49b12c6SRichard Henderson     intptr_t i;
1211f49b12c6SRichard Henderson 
1212f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1213f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1214f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
1215f49b12c6SRichard Henderson         uint64_t di = ai - bi;
1216f49b12c6SRichard Henderson         if (ai < bi) {
1217f49b12c6SRichard Henderson             di = 0;
1218f49b12c6SRichard Henderson         }
1219f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1220f49b12c6SRichard Henderson     }
1221f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1222f49b12c6SRichard Henderson }
1223dd0a0fcdSRichard Henderson 
1224dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1225dd0a0fcdSRichard Henderson {
1226dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1227dd0a0fcdSRichard Henderson     intptr_t i;
1228dd0a0fcdSRichard Henderson 
1229dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1230dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1231dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1232dd0a0fcdSRichard Henderson         int8_t dd = aa < bb ? aa : bb;
1233dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1234dd0a0fcdSRichard Henderson     }
1235dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1236dd0a0fcdSRichard Henderson }
1237dd0a0fcdSRichard Henderson 
1238dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1239dd0a0fcdSRichard Henderson {
1240dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1241dd0a0fcdSRichard Henderson     intptr_t i;
1242dd0a0fcdSRichard Henderson 
1243dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1244dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1245dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1246dd0a0fcdSRichard Henderson         int16_t dd = aa < bb ? aa : bb;
1247dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1248dd0a0fcdSRichard Henderson     }
1249dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1250dd0a0fcdSRichard Henderson }
1251dd0a0fcdSRichard Henderson 
1252dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1253dd0a0fcdSRichard Henderson {
1254dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1255dd0a0fcdSRichard Henderson     intptr_t i;
1256dd0a0fcdSRichard Henderson 
1257dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1258dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1259dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1260dd0a0fcdSRichard Henderson         int32_t dd = aa < bb ? aa : bb;
1261dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1262dd0a0fcdSRichard Henderson     }
1263dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1264dd0a0fcdSRichard Henderson }
1265dd0a0fcdSRichard Henderson 
1266dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1267dd0a0fcdSRichard Henderson {
1268dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1269dd0a0fcdSRichard Henderson     intptr_t i;
1270dd0a0fcdSRichard Henderson 
1271dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1272dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1273dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1274dd0a0fcdSRichard Henderson         int64_t dd = aa < bb ? aa : bb;
1275dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1276dd0a0fcdSRichard Henderson     }
1277dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1278dd0a0fcdSRichard Henderson }
1279dd0a0fcdSRichard Henderson 
1280dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1281dd0a0fcdSRichard Henderson {
1282dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1283dd0a0fcdSRichard Henderson     intptr_t i;
1284dd0a0fcdSRichard Henderson 
1285dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1286dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1287dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1288dd0a0fcdSRichard Henderson         int8_t dd = aa > bb ? aa : bb;
1289dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1290dd0a0fcdSRichard Henderson     }
1291dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1292dd0a0fcdSRichard Henderson }
1293dd0a0fcdSRichard Henderson 
1294dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1295dd0a0fcdSRichard Henderson {
1296dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1297dd0a0fcdSRichard Henderson     intptr_t i;
1298dd0a0fcdSRichard Henderson 
1299dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1300dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1301dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1302dd0a0fcdSRichard Henderson         int16_t dd = aa > bb ? aa : bb;
1303dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1304dd0a0fcdSRichard Henderson     }
1305dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1306dd0a0fcdSRichard Henderson }
1307dd0a0fcdSRichard Henderson 
1308dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1309dd0a0fcdSRichard Henderson {
1310dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1311dd0a0fcdSRichard Henderson     intptr_t i;
1312dd0a0fcdSRichard Henderson 
1313dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1314dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1315dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1316dd0a0fcdSRichard Henderson         int32_t dd = aa > bb ? aa : bb;
1317dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1318dd0a0fcdSRichard Henderson     }
1319dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1320dd0a0fcdSRichard Henderson }
1321dd0a0fcdSRichard Henderson 
1322dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1323dd0a0fcdSRichard Henderson {
1324dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1325dd0a0fcdSRichard Henderson     intptr_t i;
1326dd0a0fcdSRichard Henderson 
1327dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1328dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1329dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1330dd0a0fcdSRichard Henderson         int64_t dd = aa > bb ? aa : bb;
1331dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1332dd0a0fcdSRichard Henderson     }
1333dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1334dd0a0fcdSRichard Henderson }
1335dd0a0fcdSRichard Henderson 
1336dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1337dd0a0fcdSRichard Henderson {
1338dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1339dd0a0fcdSRichard Henderson     intptr_t i;
1340dd0a0fcdSRichard Henderson 
1341dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1342dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1343dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1344dd0a0fcdSRichard Henderson         uint8_t dd = aa < bb ? aa : bb;
1345dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1346dd0a0fcdSRichard Henderson     }
1347dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1348dd0a0fcdSRichard Henderson }
1349dd0a0fcdSRichard Henderson 
1350dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1351dd0a0fcdSRichard Henderson {
1352dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1353dd0a0fcdSRichard Henderson     intptr_t i;
1354dd0a0fcdSRichard Henderson 
1355dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1356dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1357dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1358dd0a0fcdSRichard Henderson         uint16_t dd = aa < bb ? aa : bb;
1359dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1360dd0a0fcdSRichard Henderson     }
1361dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1362dd0a0fcdSRichard Henderson }
1363dd0a0fcdSRichard Henderson 
1364dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1365dd0a0fcdSRichard Henderson {
1366dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1367dd0a0fcdSRichard Henderson     intptr_t i;
1368dd0a0fcdSRichard Henderson 
1369dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1370dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1371dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1372dd0a0fcdSRichard Henderson         uint32_t dd = aa < bb ? aa : bb;
1373dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1374dd0a0fcdSRichard Henderson     }
1375dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1376dd0a0fcdSRichard Henderson }
1377dd0a0fcdSRichard Henderson 
1378dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1379dd0a0fcdSRichard Henderson {
1380dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1381dd0a0fcdSRichard Henderson     intptr_t i;
1382dd0a0fcdSRichard Henderson 
1383dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1384dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1385dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1386dd0a0fcdSRichard Henderson         uint64_t dd = aa < bb ? aa : bb;
1387dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1388dd0a0fcdSRichard Henderson     }
1389dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1390dd0a0fcdSRichard Henderson }
1391dd0a0fcdSRichard Henderson 
1392dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1393dd0a0fcdSRichard Henderson {
1394dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1395dd0a0fcdSRichard Henderson     intptr_t i;
1396dd0a0fcdSRichard Henderson 
1397dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1398dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1399dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1400dd0a0fcdSRichard Henderson         uint8_t dd = aa > bb ? aa : bb;
1401dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1402dd0a0fcdSRichard Henderson     }
1403dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1404dd0a0fcdSRichard Henderson }
1405dd0a0fcdSRichard Henderson 
1406dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1407dd0a0fcdSRichard Henderson {
1408dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1409dd0a0fcdSRichard Henderson     intptr_t i;
1410dd0a0fcdSRichard Henderson 
1411dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1412dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1413dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1414dd0a0fcdSRichard Henderson         uint16_t dd = aa > bb ? aa : bb;
1415dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1416dd0a0fcdSRichard Henderson     }
1417dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1418dd0a0fcdSRichard Henderson }
1419dd0a0fcdSRichard Henderson 
1420dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1421dd0a0fcdSRichard Henderson {
1422dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1423dd0a0fcdSRichard Henderson     intptr_t i;
1424dd0a0fcdSRichard Henderson 
1425dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1426dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1427dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1428dd0a0fcdSRichard Henderson         uint32_t dd = aa > bb ? aa : bb;
1429dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1430dd0a0fcdSRichard Henderson     }
1431dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1432dd0a0fcdSRichard Henderson }
1433dd0a0fcdSRichard Henderson 
1434dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1435dd0a0fcdSRichard Henderson {
1436dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1437dd0a0fcdSRichard Henderson     intptr_t i;
1438dd0a0fcdSRichard Henderson 
1439dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1440dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1441dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1442dd0a0fcdSRichard Henderson         uint64_t dd = aa > bb ? aa : bb;
1443dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1444dd0a0fcdSRichard Henderson     }
1445dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1446dd0a0fcdSRichard Henderson }
144738dc1294SRichard Henderson 
144838dc1294SRichard Henderson void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
144938dc1294SRichard Henderson {
145038dc1294SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
145138dc1294SRichard Henderson     intptr_t i;
145238dc1294SRichard Henderson 
145338dc1294SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
145438dc1294SRichard Henderson         vec64 aa = *(vec64 *)(a + i);
145538dc1294SRichard Henderson         vec64 bb = *(vec64 *)(b + i);
145638dc1294SRichard Henderson         vec64 cc = *(vec64 *)(c + i);
145738dc1294SRichard Henderson         *(vec64 *)(d + i) = (bb & aa) | (cc & ~aa);
145838dc1294SRichard Henderson     }
145938dc1294SRichard Henderson     clear_high(d, oprsz, desc);
146038dc1294SRichard Henderson }
1461