xref: /qemu/accel/tcg/tcg-runtime-gvec.c (revision fb0343d5b4dd4b9b9e96e563d913a3e0c709fe4e)
1db432672SRichard Henderson /*
2db432672SRichard Henderson  * Generic vectorized operation runtime
3db432672SRichard Henderson  *
4db432672SRichard Henderson  * Copyright (c) 2018 Linaro
5db432672SRichard Henderson  *
6db432672SRichard Henderson  * This library is free software; you can redistribute it and/or
7db432672SRichard Henderson  * modify it under the terms of the GNU Lesser General Public
8db432672SRichard Henderson  * License as published by the Free Software Foundation; either
9*fb0343d5SThomas Huth  * version 2.1 of the License, or (at your option) any later version.
10db432672SRichard Henderson  *
11db432672SRichard Henderson  * This library is distributed in the hope that it will be useful,
12db432672SRichard Henderson  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13db432672SRichard Henderson  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14db432672SRichard Henderson  * Lesser General Public License for more details.
15db432672SRichard Henderson  *
16db432672SRichard Henderson  * You should have received a copy of the GNU Lesser General Public
17db432672SRichard Henderson  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18db432672SRichard Henderson  */
19db432672SRichard Henderson 
20db432672SRichard Henderson #include "qemu/osdep.h"
21db432672SRichard Henderson #include "qemu/host-utils.h"
22db432672SRichard Henderson #include "cpu.h"
23db432672SRichard Henderson #include "exec/helper-proto.h"
24db432672SRichard Henderson #include "tcg-gvec-desc.h"
25db432672SRichard Henderson 
26db432672SRichard Henderson 
27db432672SRichard Henderson /* Virtually all hosts support 16-byte vectors.  Those that don't can emulate
28db432672SRichard Henderson  * them via GCC's generic vector extension.  This turns out to be simpler and
29db432672SRichard Henderson  * more reliable than getting the compiler to autovectorize.
30db432672SRichard Henderson  *
31db432672SRichard Henderson  * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
32db432672SRichard Henderson  * are multiples of 16.
33db432672SRichard Henderson  *
34db432672SRichard Henderson  * When the compiler does not support all of the operations we require, the
35db432672SRichard Henderson  * loops are written so that we can always fall back on the base types.
36db432672SRichard Henderson  */
37db432672SRichard Henderson #ifdef CONFIG_VECTOR16
38db432672SRichard Henderson typedef uint8_t vec8 __attribute__((vector_size(16)));
39db432672SRichard Henderson typedef uint16_t vec16 __attribute__((vector_size(16)));
40db432672SRichard Henderson typedef uint32_t vec32 __attribute__((vector_size(16)));
41db432672SRichard Henderson typedef uint64_t vec64 __attribute__((vector_size(16)));
42db432672SRichard Henderson 
43db432672SRichard Henderson typedef int8_t svec8 __attribute__((vector_size(16)));
44db432672SRichard Henderson typedef int16_t svec16 __attribute__((vector_size(16)));
45db432672SRichard Henderson typedef int32_t svec32 __attribute__((vector_size(16)));
46db432672SRichard Henderson typedef int64_t svec64 __attribute__((vector_size(16)));
47db432672SRichard Henderson 
48db432672SRichard Henderson #define DUP16(X)  { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
49db432672SRichard Henderson #define DUP8(X)   { X, X, X, X, X, X, X, X }
50db432672SRichard Henderson #define DUP4(X)   { X, X, X, X }
51db432672SRichard Henderson #define DUP2(X)   { X, X }
52db432672SRichard Henderson #else
53db432672SRichard Henderson typedef uint8_t vec8;
54db432672SRichard Henderson typedef uint16_t vec16;
55db432672SRichard Henderson typedef uint32_t vec32;
56db432672SRichard Henderson typedef uint64_t vec64;
57db432672SRichard Henderson 
58db432672SRichard Henderson typedef int8_t svec8;
59db432672SRichard Henderson typedef int16_t svec16;
60db432672SRichard Henderson typedef int32_t svec32;
61db432672SRichard Henderson typedef int64_t svec64;
62db432672SRichard Henderson 
63db432672SRichard Henderson #define DUP16(X)  X
64db432672SRichard Henderson #define DUP8(X)   X
65db432672SRichard Henderson #define DUP4(X)   X
66db432672SRichard Henderson #define DUP2(X)   X
67db432672SRichard Henderson #endif /* CONFIG_VECTOR16 */
68db432672SRichard Henderson 
69db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
70db432672SRichard Henderson {
71db432672SRichard Henderson     intptr_t maxsz = simd_maxsz(desc);
72db432672SRichard Henderson     intptr_t i;
73db432672SRichard Henderson 
74db432672SRichard Henderson     if (unlikely(maxsz > oprsz)) {
75db432672SRichard Henderson         for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
76db432672SRichard Henderson             *(uint64_t *)(d + i) = 0;
77db432672SRichard Henderson         }
78db432672SRichard Henderson     }
79db432672SRichard Henderson }
80db432672SRichard Henderson 
81db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
82db432672SRichard Henderson {
83db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
84db432672SRichard Henderson     intptr_t i;
85db432672SRichard Henderson 
86db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
87db432672SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
88db432672SRichard Henderson     }
89db432672SRichard Henderson     clear_high(d, oprsz, desc);
90db432672SRichard Henderson }
91db432672SRichard Henderson 
92db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
93db432672SRichard Henderson {
94db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
95db432672SRichard Henderson     intptr_t i;
96db432672SRichard Henderson 
97db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
98db432672SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
99db432672SRichard Henderson     }
100db432672SRichard Henderson     clear_high(d, oprsz, desc);
101db432672SRichard Henderson }
102db432672SRichard Henderson 
103db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
104db432672SRichard Henderson {
105db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
106db432672SRichard Henderson     intptr_t i;
107db432672SRichard Henderson 
108db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
109db432672SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
110db432672SRichard Henderson     }
111db432672SRichard Henderson     clear_high(d, oprsz, desc);
112db432672SRichard Henderson }
113db432672SRichard Henderson 
114db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
115db432672SRichard Henderson {
116db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
117db432672SRichard Henderson     intptr_t i;
118db432672SRichard Henderson 
119db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
120db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
121db432672SRichard Henderson     }
122db432672SRichard Henderson     clear_high(d, oprsz, desc);
123db432672SRichard Henderson }
124db432672SRichard Henderson 
12522fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
12622fc3527SRichard Henderson {
12722fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
12822fc3527SRichard Henderson     vec8 vecb = (vec8)DUP16(b);
12922fc3527SRichard Henderson     intptr_t i;
13022fc3527SRichard Henderson 
13122fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
13222fc3527SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
13322fc3527SRichard Henderson     }
13422fc3527SRichard Henderson     clear_high(d, oprsz, desc);
13522fc3527SRichard Henderson }
13622fc3527SRichard Henderson 
13722fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
13822fc3527SRichard Henderson {
13922fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
14022fc3527SRichard Henderson     vec16 vecb = (vec16)DUP8(b);
14122fc3527SRichard Henderson     intptr_t i;
14222fc3527SRichard Henderson 
14322fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
14422fc3527SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
14522fc3527SRichard Henderson     }
14622fc3527SRichard Henderson     clear_high(d, oprsz, desc);
14722fc3527SRichard Henderson }
14822fc3527SRichard Henderson 
14922fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
15022fc3527SRichard Henderson {
15122fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
15222fc3527SRichard Henderson     vec32 vecb = (vec32)DUP4(b);
15322fc3527SRichard Henderson     intptr_t i;
15422fc3527SRichard Henderson 
15522fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
15622fc3527SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
15722fc3527SRichard Henderson     }
15822fc3527SRichard Henderson     clear_high(d, oprsz, desc);
15922fc3527SRichard Henderson }
16022fc3527SRichard Henderson 
16122fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
16222fc3527SRichard Henderson {
16322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
16422fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
16522fc3527SRichard Henderson     intptr_t i;
16622fc3527SRichard Henderson 
16722fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
16822fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
16922fc3527SRichard Henderson     }
17022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
17122fc3527SRichard Henderson }
17222fc3527SRichard Henderson 
173db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
174db432672SRichard Henderson {
175db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
176db432672SRichard Henderson     intptr_t i;
177db432672SRichard Henderson 
178db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
179db432672SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
180db432672SRichard Henderson     }
181db432672SRichard Henderson     clear_high(d, oprsz, desc);
182db432672SRichard Henderson }
183db432672SRichard Henderson 
184db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
185db432672SRichard Henderson {
186db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
187db432672SRichard Henderson     intptr_t i;
188db432672SRichard Henderson 
189db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
190db432672SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
191db432672SRichard Henderson     }
192db432672SRichard Henderson     clear_high(d, oprsz, desc);
193db432672SRichard Henderson }
194db432672SRichard Henderson 
195db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
196db432672SRichard Henderson {
197db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
198db432672SRichard Henderson     intptr_t i;
199db432672SRichard Henderson 
200db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
201db432672SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
202db432672SRichard Henderson     }
203db432672SRichard Henderson     clear_high(d, oprsz, desc);
204db432672SRichard Henderson }
205db432672SRichard Henderson 
206db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
207db432672SRichard Henderson {
208db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
209db432672SRichard Henderson     intptr_t i;
210db432672SRichard Henderson 
211db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
212db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
213db432672SRichard Henderson     }
214db432672SRichard Henderson     clear_high(d, oprsz, desc);
215db432672SRichard Henderson }
216db432672SRichard Henderson 
21722fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
21822fc3527SRichard Henderson {
21922fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
22022fc3527SRichard Henderson     vec8 vecb = (vec8)DUP16(b);
22122fc3527SRichard Henderson     intptr_t i;
22222fc3527SRichard Henderson 
22322fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
22422fc3527SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
22522fc3527SRichard Henderson     }
22622fc3527SRichard Henderson     clear_high(d, oprsz, desc);
22722fc3527SRichard Henderson }
22822fc3527SRichard Henderson 
22922fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
23022fc3527SRichard Henderson {
23122fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
23222fc3527SRichard Henderson     vec16 vecb = (vec16)DUP8(b);
23322fc3527SRichard Henderson     intptr_t i;
23422fc3527SRichard Henderson 
23522fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
23622fc3527SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
23722fc3527SRichard Henderson     }
23822fc3527SRichard Henderson     clear_high(d, oprsz, desc);
23922fc3527SRichard Henderson }
24022fc3527SRichard Henderson 
24122fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
24222fc3527SRichard Henderson {
24322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
24422fc3527SRichard Henderson     vec32 vecb = (vec32)DUP4(b);
24522fc3527SRichard Henderson     intptr_t i;
24622fc3527SRichard Henderson 
24722fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
24822fc3527SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
24922fc3527SRichard Henderson     }
25022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
25122fc3527SRichard Henderson }
25222fc3527SRichard Henderson 
25322fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
25422fc3527SRichard Henderson {
25522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
25622fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
25722fc3527SRichard Henderson     intptr_t i;
25822fc3527SRichard Henderson 
25922fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
26022fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
26122fc3527SRichard Henderson     }
26222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
26322fc3527SRichard Henderson }
26422fc3527SRichard Henderson 
2653774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
2663774030aSRichard Henderson {
2673774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2683774030aSRichard Henderson     intptr_t i;
2693774030aSRichard Henderson 
2703774030aSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
2713774030aSRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
2723774030aSRichard Henderson     }
2733774030aSRichard Henderson     clear_high(d, oprsz, desc);
2743774030aSRichard Henderson }
2753774030aSRichard Henderson 
2763774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
2773774030aSRichard Henderson {
2783774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2793774030aSRichard Henderson     intptr_t i;
2803774030aSRichard Henderson 
2813774030aSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
2823774030aSRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
2833774030aSRichard Henderson     }
2843774030aSRichard Henderson     clear_high(d, oprsz, desc);
2853774030aSRichard Henderson }
2863774030aSRichard Henderson 
2873774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
2883774030aSRichard Henderson {
2893774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
2903774030aSRichard Henderson     intptr_t i;
2913774030aSRichard Henderson 
2923774030aSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
2933774030aSRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
2943774030aSRichard Henderson     }
2953774030aSRichard Henderson     clear_high(d, oprsz, desc);
2963774030aSRichard Henderson }
2973774030aSRichard Henderson 
2983774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
2993774030aSRichard Henderson {
3003774030aSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
3013774030aSRichard Henderson     intptr_t i;
3023774030aSRichard Henderson 
3033774030aSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
3043774030aSRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
3053774030aSRichard Henderson     }
3063774030aSRichard Henderson     clear_high(d, oprsz, desc);
3073774030aSRichard Henderson }
3083774030aSRichard Henderson 
30922fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
31022fc3527SRichard Henderson {
31122fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
31222fc3527SRichard Henderson     vec8 vecb = (vec8)DUP16(b);
31322fc3527SRichard Henderson     intptr_t i;
31422fc3527SRichard Henderson 
31522fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
31622fc3527SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
31722fc3527SRichard Henderson     }
31822fc3527SRichard Henderson     clear_high(d, oprsz, desc);
31922fc3527SRichard Henderson }
32022fc3527SRichard Henderson 
32122fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
32222fc3527SRichard Henderson {
32322fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
32422fc3527SRichard Henderson     vec16 vecb = (vec16)DUP8(b);
32522fc3527SRichard Henderson     intptr_t i;
32622fc3527SRichard Henderson 
32722fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
32822fc3527SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
32922fc3527SRichard Henderson     }
33022fc3527SRichard Henderson     clear_high(d, oprsz, desc);
33122fc3527SRichard Henderson }
33222fc3527SRichard Henderson 
33322fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
33422fc3527SRichard Henderson {
33522fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
33622fc3527SRichard Henderson     vec32 vecb = (vec32)DUP4(b);
33722fc3527SRichard Henderson     intptr_t i;
33822fc3527SRichard Henderson 
33922fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
34022fc3527SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
34122fc3527SRichard Henderson     }
34222fc3527SRichard Henderson     clear_high(d, oprsz, desc);
34322fc3527SRichard Henderson }
34422fc3527SRichard Henderson 
34522fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
34622fc3527SRichard Henderson {
34722fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
34822fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
34922fc3527SRichard Henderson     intptr_t i;
35022fc3527SRichard Henderson 
35122fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
35222fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
35322fc3527SRichard Henderson     }
35422fc3527SRichard Henderson     clear_high(d, oprsz, desc);
35522fc3527SRichard Henderson }
35622fc3527SRichard Henderson 
357db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
358db432672SRichard Henderson {
359db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
360db432672SRichard Henderson     intptr_t i;
361db432672SRichard Henderson 
362db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
363db432672SRichard Henderson         *(vec8 *)(d + i) = -*(vec8 *)(a + i);
364db432672SRichard Henderson     }
365db432672SRichard Henderson     clear_high(d, oprsz, desc);
366db432672SRichard Henderson }
367db432672SRichard Henderson 
368db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
369db432672SRichard Henderson {
370db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
371db432672SRichard Henderson     intptr_t i;
372db432672SRichard Henderson 
373db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
374db432672SRichard Henderson         *(vec16 *)(d + i) = -*(vec16 *)(a + i);
375db432672SRichard Henderson     }
376db432672SRichard Henderson     clear_high(d, oprsz, desc);
377db432672SRichard Henderson }
378db432672SRichard Henderson 
379db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
380db432672SRichard Henderson {
381db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
382db432672SRichard Henderson     intptr_t i;
383db432672SRichard Henderson 
384db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
385db432672SRichard Henderson         *(vec32 *)(d + i) = -*(vec32 *)(a + i);
386db432672SRichard Henderson     }
387db432672SRichard Henderson     clear_high(d, oprsz, desc);
388db432672SRichard Henderson }
389db432672SRichard Henderson 
390db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
391db432672SRichard Henderson {
392db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
393db432672SRichard Henderson     intptr_t i;
394db432672SRichard Henderson 
395db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
396db432672SRichard Henderson         *(vec64 *)(d + i) = -*(vec64 *)(a + i);
397db432672SRichard Henderson     }
398db432672SRichard Henderson     clear_high(d, oprsz, desc);
399db432672SRichard Henderson }
400db432672SRichard Henderson 
401db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
402db432672SRichard Henderson {
403db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
404db432672SRichard Henderson 
405db432672SRichard Henderson     memcpy(d, a, oprsz);
406db432672SRichard Henderson     clear_high(d, oprsz, desc);
407db432672SRichard Henderson }
408db432672SRichard Henderson 
409db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
410db432672SRichard Henderson {
411db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
412db432672SRichard Henderson     intptr_t i;
413db432672SRichard Henderson 
414db432672SRichard Henderson     if (c == 0) {
415db432672SRichard Henderson         oprsz = 0;
416db432672SRichard Henderson     } else {
417db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
418db432672SRichard Henderson             *(uint64_t *)(d + i) = c;
419db432672SRichard Henderson         }
420db432672SRichard Henderson     }
421db432672SRichard Henderson     clear_high(d, oprsz, desc);
422db432672SRichard Henderson }
423db432672SRichard Henderson 
424db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
425db432672SRichard Henderson {
426db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
427db432672SRichard Henderson     intptr_t i;
428db432672SRichard Henderson 
429db432672SRichard Henderson     if (c == 0) {
430db432672SRichard Henderson         oprsz = 0;
431db432672SRichard Henderson     } else {
432db432672SRichard Henderson         for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
433db432672SRichard Henderson             *(uint32_t *)(d + i) = c;
434db432672SRichard Henderson         }
435db432672SRichard Henderson     }
436db432672SRichard Henderson     clear_high(d, oprsz, desc);
437db432672SRichard Henderson }
438db432672SRichard Henderson 
439db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
440db432672SRichard Henderson {
441db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
442db432672SRichard Henderson }
443db432672SRichard Henderson 
444db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
445db432672SRichard Henderson {
446db432672SRichard Henderson     HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
447db432672SRichard Henderson }
448db432672SRichard Henderson 
449db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
450db432672SRichard Henderson {
451db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
452db432672SRichard Henderson     intptr_t i;
453db432672SRichard Henderson 
454db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
455db432672SRichard Henderson         *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
456db432672SRichard Henderson     }
457db432672SRichard Henderson     clear_high(d, oprsz, desc);
458db432672SRichard Henderson }
459db432672SRichard Henderson 
460db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
461db432672SRichard Henderson {
462db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
463db432672SRichard Henderson     intptr_t i;
464db432672SRichard Henderson 
465db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
466db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
467db432672SRichard Henderson     }
468db432672SRichard Henderson     clear_high(d, oprsz, desc);
469db432672SRichard Henderson }
470db432672SRichard Henderson 
471db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
472db432672SRichard Henderson {
473db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
474db432672SRichard Henderson     intptr_t i;
475db432672SRichard Henderson 
476db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
477db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
478db432672SRichard Henderson     }
479db432672SRichard Henderson     clear_high(d, oprsz, desc);
480db432672SRichard Henderson }
481db432672SRichard Henderson 
482db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
483db432672SRichard Henderson {
484db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
485db432672SRichard Henderson     intptr_t i;
486db432672SRichard Henderson 
487db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
488db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
489db432672SRichard Henderson     }
490db432672SRichard Henderson     clear_high(d, oprsz, desc);
491db432672SRichard Henderson }
492db432672SRichard Henderson 
493db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
494db432672SRichard Henderson {
495db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
496db432672SRichard Henderson     intptr_t i;
497db432672SRichard Henderson 
498db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
499db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
500db432672SRichard Henderson     }
501db432672SRichard Henderson     clear_high(d, oprsz, desc);
502db432672SRichard Henderson }
503db432672SRichard Henderson 
504db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
505db432672SRichard Henderson {
506db432672SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
507db432672SRichard Henderson     intptr_t i;
508db432672SRichard Henderson 
509db432672SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
510db432672SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
511db432672SRichard Henderson     }
512db432672SRichard Henderson     clear_high(d, oprsz, desc);
513db432672SRichard Henderson }
514d0ec9796SRichard Henderson 
515f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
516f550805dSRichard Henderson {
517f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
518f550805dSRichard Henderson     intptr_t i;
519f550805dSRichard Henderson 
520f550805dSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
521f550805dSRichard Henderson         *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i));
522f550805dSRichard Henderson     }
523f550805dSRichard Henderson     clear_high(d, oprsz, desc);
524f550805dSRichard Henderson }
525f550805dSRichard Henderson 
526f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
527f550805dSRichard Henderson {
528f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
529f550805dSRichard Henderson     intptr_t i;
530f550805dSRichard Henderson 
531f550805dSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
532f550805dSRichard Henderson         *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i));
533f550805dSRichard Henderson     }
534f550805dSRichard Henderson     clear_high(d, oprsz, desc);
535f550805dSRichard Henderson }
536f550805dSRichard Henderson 
537f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
538f550805dSRichard Henderson {
539f550805dSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
540f550805dSRichard Henderson     intptr_t i;
541f550805dSRichard Henderson 
542f550805dSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
543f550805dSRichard Henderson         *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i));
544f550805dSRichard Henderson     }
545f550805dSRichard Henderson     clear_high(d, oprsz, desc);
546f550805dSRichard Henderson }
547f550805dSRichard Henderson 
54822fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
54922fc3527SRichard Henderson {
55022fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
55122fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
55222fc3527SRichard Henderson     intptr_t i;
55322fc3527SRichard Henderson 
55422fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
55522fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
55622fc3527SRichard Henderson     }
55722fc3527SRichard Henderson     clear_high(d, oprsz, desc);
55822fc3527SRichard Henderson }
55922fc3527SRichard Henderson 
56022fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
56122fc3527SRichard Henderson {
56222fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
56322fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
56422fc3527SRichard Henderson     intptr_t i;
56522fc3527SRichard Henderson 
56622fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
56722fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
56822fc3527SRichard Henderson     }
56922fc3527SRichard Henderson     clear_high(d, oprsz, desc);
57022fc3527SRichard Henderson }
57122fc3527SRichard Henderson 
57222fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
57322fc3527SRichard Henderson {
57422fc3527SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
57522fc3527SRichard Henderson     vec64 vecb = (vec64)DUP2(b);
57622fc3527SRichard Henderson     intptr_t i;
57722fc3527SRichard Henderson 
57822fc3527SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
57922fc3527SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
58022fc3527SRichard Henderson     }
58122fc3527SRichard Henderson     clear_high(d, oprsz, desc);
58222fc3527SRichard Henderson }
58322fc3527SRichard Henderson 
584d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
585d0ec9796SRichard Henderson {
586d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
587d0ec9796SRichard Henderson     int shift = simd_data(desc);
588d0ec9796SRichard Henderson     intptr_t i;
589d0ec9796SRichard Henderson 
590d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
591d0ec9796SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
592d0ec9796SRichard Henderson     }
593d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
594d0ec9796SRichard Henderson }
595d0ec9796SRichard Henderson 
596d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
597d0ec9796SRichard Henderson {
598d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
599d0ec9796SRichard Henderson     int shift = simd_data(desc);
600d0ec9796SRichard Henderson     intptr_t i;
601d0ec9796SRichard Henderson 
602d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
603d0ec9796SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
604d0ec9796SRichard Henderson     }
605d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
606d0ec9796SRichard Henderson }
607d0ec9796SRichard Henderson 
608d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
609d0ec9796SRichard Henderson {
610d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
611d0ec9796SRichard Henderson     int shift = simd_data(desc);
612d0ec9796SRichard Henderson     intptr_t i;
613d0ec9796SRichard Henderson 
614d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
615d0ec9796SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
616d0ec9796SRichard Henderson     }
617d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
618d0ec9796SRichard Henderson }
619d0ec9796SRichard Henderson 
620d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
621d0ec9796SRichard Henderson {
622d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
623d0ec9796SRichard Henderson     int shift = simd_data(desc);
624d0ec9796SRichard Henderson     intptr_t i;
625d0ec9796SRichard Henderson 
626d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
627d0ec9796SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
628d0ec9796SRichard Henderson     }
629d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
630d0ec9796SRichard Henderson }
631d0ec9796SRichard Henderson 
632d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
633d0ec9796SRichard Henderson {
634d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
635d0ec9796SRichard Henderson     int shift = simd_data(desc);
636d0ec9796SRichard Henderson     intptr_t i;
637d0ec9796SRichard Henderson 
638d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
639d0ec9796SRichard Henderson         *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
640d0ec9796SRichard Henderson     }
641d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
642d0ec9796SRichard Henderson }
643d0ec9796SRichard Henderson 
644d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
645d0ec9796SRichard Henderson {
646d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
647d0ec9796SRichard Henderson     int shift = simd_data(desc);
648d0ec9796SRichard Henderson     intptr_t i;
649d0ec9796SRichard Henderson 
650d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
651d0ec9796SRichard Henderson         *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
652d0ec9796SRichard Henderson     }
653d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
654d0ec9796SRichard Henderson }
655d0ec9796SRichard Henderson 
656d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
657d0ec9796SRichard Henderson {
658d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
659d0ec9796SRichard Henderson     int shift = simd_data(desc);
660d0ec9796SRichard Henderson     intptr_t i;
661d0ec9796SRichard Henderson 
662d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
663d0ec9796SRichard Henderson         *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
664d0ec9796SRichard Henderson     }
665d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
666d0ec9796SRichard Henderson }
667d0ec9796SRichard Henderson 
668d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
669d0ec9796SRichard Henderson {
670d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
671d0ec9796SRichard Henderson     int shift = simd_data(desc);
672d0ec9796SRichard Henderson     intptr_t i;
673d0ec9796SRichard Henderson 
674d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
675d0ec9796SRichard Henderson         *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
676d0ec9796SRichard Henderson     }
677d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
678d0ec9796SRichard Henderson }
679d0ec9796SRichard Henderson 
680d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
681d0ec9796SRichard Henderson {
682d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
683d0ec9796SRichard Henderson     int shift = simd_data(desc);
684d0ec9796SRichard Henderson     intptr_t i;
685d0ec9796SRichard Henderson 
686d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec8)) {
687d0ec9796SRichard Henderson         *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
688d0ec9796SRichard Henderson     }
689d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
690d0ec9796SRichard Henderson }
691d0ec9796SRichard Henderson 
692d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
693d0ec9796SRichard Henderson {
694d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
695d0ec9796SRichard Henderson     int shift = simd_data(desc);
696d0ec9796SRichard Henderson     intptr_t i;
697d0ec9796SRichard Henderson 
698d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec16)) {
699d0ec9796SRichard Henderson         *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
700d0ec9796SRichard Henderson     }
701d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
702d0ec9796SRichard Henderson }
703d0ec9796SRichard Henderson 
704d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
705d0ec9796SRichard Henderson {
706d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
707d0ec9796SRichard Henderson     int shift = simd_data(desc);
708d0ec9796SRichard Henderson     intptr_t i;
709d0ec9796SRichard Henderson 
710d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec32)) {
711d0ec9796SRichard Henderson         *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
712d0ec9796SRichard Henderson     }
713d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
714d0ec9796SRichard Henderson }
715d0ec9796SRichard Henderson 
716d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
717d0ec9796SRichard Henderson {
718d0ec9796SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
719d0ec9796SRichard Henderson     int shift = simd_data(desc);
720d0ec9796SRichard Henderson     intptr_t i;
721d0ec9796SRichard Henderson 
722d0ec9796SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(vec64)) {
723d0ec9796SRichard Henderson         *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
724d0ec9796SRichard Henderson     }
725d0ec9796SRichard Henderson     clear_high(d, oprsz, desc);
726d0ec9796SRichard Henderson }
727212be173SRichard Henderson 
728212be173SRichard Henderson /* If vectors are enabled, the compiler fills in -1 for true.
729212be173SRichard Henderson    Otherwise, we must take care of this by hand.  */
730212be173SRichard Henderson #ifdef CONFIG_VECTOR16
731212be173SRichard Henderson # define DO_CMP0(X)  X
732212be173SRichard Henderson #else
733212be173SRichard Henderson # define DO_CMP0(X)  -(X)
734212be173SRichard Henderson #endif
735212be173SRichard Henderson 
736212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP)                                            \
737212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
738212be173SRichard Henderson {                                                                          \
739212be173SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);                                     \
740212be173SRichard Henderson     intptr_t i;                                                            \
7416cb1d3b8SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
742212be173SRichard Henderson         *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i));  \
743212be173SRichard Henderson     }                                                                      \
744212be173SRichard Henderson     clear_high(d, oprsz, desc);                                            \
745212be173SRichard Henderson }
746212be173SRichard Henderson 
747212be173SRichard Henderson #define DO_CMP2(SZ) \
748212be173SRichard Henderson     DO_CMP1(gvec_eq##SZ, vec##SZ, ==)    \
749212be173SRichard Henderson     DO_CMP1(gvec_ne##SZ, vec##SZ, !=)    \
750212be173SRichard Henderson     DO_CMP1(gvec_lt##SZ, svec##SZ, <)    \
751212be173SRichard Henderson     DO_CMP1(gvec_le##SZ, svec##SZ, <=)   \
752212be173SRichard Henderson     DO_CMP1(gvec_ltu##SZ, vec##SZ, <)    \
753212be173SRichard Henderson     DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
754212be173SRichard Henderson 
755212be173SRichard Henderson DO_CMP2(8)
756212be173SRichard Henderson DO_CMP2(16)
757212be173SRichard Henderson DO_CMP2(32)
758212be173SRichard Henderson DO_CMP2(64)
759212be173SRichard Henderson 
760212be173SRichard Henderson #undef DO_CMP0
761212be173SRichard Henderson #undef DO_CMP1
762212be173SRichard Henderson #undef DO_CMP2
763f49b12c6SRichard Henderson 
764f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
765f49b12c6SRichard Henderson {
766f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
767f49b12c6SRichard Henderson     intptr_t i;
768f49b12c6SRichard Henderson 
769f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
770f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
771f49b12c6SRichard Henderson         if (r > INT8_MAX) {
772f49b12c6SRichard Henderson             r = INT8_MAX;
773f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
774f49b12c6SRichard Henderson             r = INT8_MIN;
775f49b12c6SRichard Henderson         }
776f49b12c6SRichard Henderson         *(int8_t *)(d + i) = r;
777f49b12c6SRichard Henderson     }
778f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
779f49b12c6SRichard Henderson }
780f49b12c6SRichard Henderson 
781f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
782f49b12c6SRichard Henderson {
783f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
784f49b12c6SRichard Henderson     intptr_t i;
785f49b12c6SRichard Henderson 
786f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
787f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
788f49b12c6SRichard Henderson         if (r > INT16_MAX) {
789f49b12c6SRichard Henderson             r = INT16_MAX;
790f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
791f49b12c6SRichard Henderson             r = INT16_MIN;
792f49b12c6SRichard Henderson         }
793f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
794f49b12c6SRichard Henderson     }
795f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
796f49b12c6SRichard Henderson }
797f49b12c6SRichard Henderson 
798f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
799f49b12c6SRichard Henderson {
800f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
801f49b12c6SRichard Henderson     intptr_t i;
802f49b12c6SRichard Henderson 
803f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
804f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
805f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
806f49b12c6SRichard Henderson         int32_t di = ai + bi;
807f49b12c6SRichard Henderson         if (((di ^ ai) &~ (ai ^ bi)) < 0) {
808f49b12c6SRichard Henderson             /* Signed overflow.  */
809f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
810f49b12c6SRichard Henderson         }
811f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
812f49b12c6SRichard Henderson     }
813f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
814f49b12c6SRichard Henderson }
815f49b12c6SRichard Henderson 
816f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
817f49b12c6SRichard Henderson {
818f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
819f49b12c6SRichard Henderson     intptr_t i;
820f49b12c6SRichard Henderson 
821f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
822f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
823f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
824f49b12c6SRichard Henderson         int64_t di = ai + bi;
825f49b12c6SRichard Henderson         if (((di ^ ai) &~ (ai ^ bi)) < 0) {
826f49b12c6SRichard Henderson             /* Signed overflow.  */
827f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
828f49b12c6SRichard Henderson         }
829f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
830f49b12c6SRichard Henderson     }
831f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
832f49b12c6SRichard Henderson }
833f49b12c6SRichard Henderson 
834f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
835f49b12c6SRichard Henderson {
836f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
837f49b12c6SRichard Henderson     intptr_t i;
838f49b12c6SRichard Henderson 
839f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
840f49b12c6SRichard Henderson         int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
841f49b12c6SRichard Henderson         if (r > INT8_MAX) {
842f49b12c6SRichard Henderson             r = INT8_MAX;
843f49b12c6SRichard Henderson         } else if (r < INT8_MIN) {
844f49b12c6SRichard Henderson             r = INT8_MIN;
845f49b12c6SRichard Henderson         }
846f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
847f49b12c6SRichard Henderson     }
848f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
849f49b12c6SRichard Henderson }
850f49b12c6SRichard Henderson 
851f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
852f49b12c6SRichard Henderson {
853f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
854f49b12c6SRichard Henderson     intptr_t i;
855f49b12c6SRichard Henderson 
856f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
857f49b12c6SRichard Henderson         int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
858f49b12c6SRichard Henderson         if (r > INT16_MAX) {
859f49b12c6SRichard Henderson             r = INT16_MAX;
860f49b12c6SRichard Henderson         } else if (r < INT16_MIN) {
861f49b12c6SRichard Henderson             r = INT16_MIN;
862f49b12c6SRichard Henderson         }
863f49b12c6SRichard Henderson         *(int16_t *)(d + i) = r;
864f49b12c6SRichard Henderson     }
865f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
866f49b12c6SRichard Henderson }
867f49b12c6SRichard Henderson 
868f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
869f49b12c6SRichard Henderson {
870f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
871f49b12c6SRichard Henderson     intptr_t i;
872f49b12c6SRichard Henderson 
873f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
874f49b12c6SRichard Henderson         int32_t ai = *(int32_t *)(a + i);
875f49b12c6SRichard Henderson         int32_t bi = *(int32_t *)(b + i);
876f49b12c6SRichard Henderson         int32_t di = ai - bi;
877f49b12c6SRichard Henderson         if (((di ^ ai) & (ai ^ bi)) < 0) {
878f49b12c6SRichard Henderson             /* Signed overflow.  */
879f49b12c6SRichard Henderson             di = (di < 0 ? INT32_MAX : INT32_MIN);
880f49b12c6SRichard Henderson         }
881f49b12c6SRichard Henderson         *(int32_t *)(d + i) = di;
882f49b12c6SRichard Henderson     }
883f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
884f49b12c6SRichard Henderson }
885f49b12c6SRichard Henderson 
886f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
887f49b12c6SRichard Henderson {
888f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
889f49b12c6SRichard Henderson     intptr_t i;
890f49b12c6SRichard Henderson 
891f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
892f49b12c6SRichard Henderson         int64_t ai = *(int64_t *)(a + i);
893f49b12c6SRichard Henderson         int64_t bi = *(int64_t *)(b + i);
894f49b12c6SRichard Henderson         int64_t di = ai - bi;
895f49b12c6SRichard Henderson         if (((di ^ ai) & (ai ^ bi)) < 0) {
896f49b12c6SRichard Henderson             /* Signed overflow.  */
897f49b12c6SRichard Henderson             di = (di < 0 ? INT64_MAX : INT64_MIN);
898f49b12c6SRichard Henderson         }
899f49b12c6SRichard Henderson         *(int64_t *)(d + i) = di;
900f49b12c6SRichard Henderson     }
901f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
902f49b12c6SRichard Henderson }
903f49b12c6SRichard Henderson 
904f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
905f49b12c6SRichard Henderson {
906f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
907f49b12c6SRichard Henderson     intptr_t i;
908f49b12c6SRichard Henderson 
909f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
910f49b12c6SRichard Henderson         unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
911f49b12c6SRichard Henderson         if (r > UINT8_MAX) {
912f49b12c6SRichard Henderson             r = UINT8_MAX;
913f49b12c6SRichard Henderson         }
914f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
915f49b12c6SRichard Henderson     }
916f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
917f49b12c6SRichard Henderson }
918f49b12c6SRichard Henderson 
919f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
920f49b12c6SRichard Henderson {
921f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
922f49b12c6SRichard Henderson     intptr_t i;
923f49b12c6SRichard Henderson 
924f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
925f49b12c6SRichard Henderson         unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
926f49b12c6SRichard Henderson         if (r > UINT16_MAX) {
927f49b12c6SRichard Henderson             r = UINT16_MAX;
928f49b12c6SRichard Henderson         }
929f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
930f49b12c6SRichard Henderson     }
931f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
932f49b12c6SRichard Henderson }
933f49b12c6SRichard Henderson 
934f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
935f49b12c6SRichard Henderson {
936f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
937f49b12c6SRichard Henderson     intptr_t i;
938f49b12c6SRichard Henderson 
939f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
940f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
941f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
942f49b12c6SRichard Henderson         uint32_t di = ai + bi;
943f49b12c6SRichard Henderson         if (di < ai) {
944f49b12c6SRichard Henderson             di = UINT32_MAX;
945f49b12c6SRichard Henderson         }
946f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
947f49b12c6SRichard Henderson     }
948f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
949f49b12c6SRichard Henderson }
950f49b12c6SRichard Henderson 
951f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
952f49b12c6SRichard Henderson {
953f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
954f49b12c6SRichard Henderson     intptr_t i;
955f49b12c6SRichard Henderson 
956f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
957f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
958f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
959f49b12c6SRichard Henderson         uint64_t di = ai + bi;
960f49b12c6SRichard Henderson         if (di < ai) {
961f49b12c6SRichard Henderson             di = UINT64_MAX;
962f49b12c6SRichard Henderson         }
963f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
964f49b12c6SRichard Henderson     }
965f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
966f49b12c6SRichard Henderson }
967f49b12c6SRichard Henderson 
968f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
969f49b12c6SRichard Henderson {
970f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
971f49b12c6SRichard Henderson     intptr_t i;
972f49b12c6SRichard Henderson 
973f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
974f49b12c6SRichard Henderson         int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
975f49b12c6SRichard Henderson         if (r < 0) {
976f49b12c6SRichard Henderson             r = 0;
977f49b12c6SRichard Henderson         }
978f49b12c6SRichard Henderson         *(uint8_t *)(d + i) = r;
979f49b12c6SRichard Henderson     }
980f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
981f49b12c6SRichard Henderson }
982f49b12c6SRichard Henderson 
983f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
984f49b12c6SRichard Henderson {
985f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
986f49b12c6SRichard Henderson     intptr_t i;
987f49b12c6SRichard Henderson 
988f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
989f49b12c6SRichard Henderson         int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
990f49b12c6SRichard Henderson         if (r < 0) {
991f49b12c6SRichard Henderson             r = 0;
992f49b12c6SRichard Henderson         }
993f49b12c6SRichard Henderson         *(uint16_t *)(d + i) = r;
994f49b12c6SRichard Henderson     }
995f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
996f49b12c6SRichard Henderson }
997f49b12c6SRichard Henderson 
998f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
999f49b12c6SRichard Henderson {
1000f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1001f49b12c6SRichard Henderson     intptr_t i;
1002f49b12c6SRichard Henderson 
1003f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1004f49b12c6SRichard Henderson         uint32_t ai = *(uint32_t *)(a + i);
1005f49b12c6SRichard Henderson         uint32_t bi = *(uint32_t *)(b + i);
1006f49b12c6SRichard Henderson         uint32_t di = ai - bi;
1007f49b12c6SRichard Henderson         if (ai < bi) {
1008f49b12c6SRichard Henderson             di = 0;
1009f49b12c6SRichard Henderson         }
1010f49b12c6SRichard Henderson         *(uint32_t *)(d + i) = di;
1011f49b12c6SRichard Henderson     }
1012f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1013f49b12c6SRichard Henderson }
1014f49b12c6SRichard Henderson 
1015f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1016f49b12c6SRichard Henderson {
1017f49b12c6SRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1018f49b12c6SRichard Henderson     intptr_t i;
1019f49b12c6SRichard Henderson 
1020f49b12c6SRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1021f49b12c6SRichard Henderson         uint64_t ai = *(uint64_t *)(a + i);
1022f49b12c6SRichard Henderson         uint64_t bi = *(uint64_t *)(b + i);
1023f49b12c6SRichard Henderson         uint64_t di = ai - bi;
1024f49b12c6SRichard Henderson         if (ai < bi) {
1025f49b12c6SRichard Henderson             di = 0;
1026f49b12c6SRichard Henderson         }
1027f49b12c6SRichard Henderson         *(uint64_t *)(d + i) = di;
1028f49b12c6SRichard Henderson     }
1029f49b12c6SRichard Henderson     clear_high(d, oprsz, desc);
1030f49b12c6SRichard Henderson }
1031dd0a0fcdSRichard Henderson 
1032dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1033dd0a0fcdSRichard Henderson {
1034dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1035dd0a0fcdSRichard Henderson     intptr_t i;
1036dd0a0fcdSRichard Henderson 
1037dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1038dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1039dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1040dd0a0fcdSRichard Henderson         int8_t dd = aa < bb ? aa : bb;
1041dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1042dd0a0fcdSRichard Henderson     }
1043dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1044dd0a0fcdSRichard Henderson }
1045dd0a0fcdSRichard Henderson 
1046dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1047dd0a0fcdSRichard Henderson {
1048dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1049dd0a0fcdSRichard Henderson     intptr_t i;
1050dd0a0fcdSRichard Henderson 
1051dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1052dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1053dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1054dd0a0fcdSRichard Henderson         int16_t dd = aa < bb ? aa : bb;
1055dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1056dd0a0fcdSRichard Henderson     }
1057dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1058dd0a0fcdSRichard Henderson }
1059dd0a0fcdSRichard Henderson 
1060dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1061dd0a0fcdSRichard Henderson {
1062dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1063dd0a0fcdSRichard Henderson     intptr_t i;
1064dd0a0fcdSRichard Henderson 
1065dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1066dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1067dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1068dd0a0fcdSRichard Henderson         int32_t dd = aa < bb ? aa : bb;
1069dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1070dd0a0fcdSRichard Henderson     }
1071dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1072dd0a0fcdSRichard Henderson }
1073dd0a0fcdSRichard Henderson 
1074dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1075dd0a0fcdSRichard Henderson {
1076dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1077dd0a0fcdSRichard Henderson     intptr_t i;
1078dd0a0fcdSRichard Henderson 
1079dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1080dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1081dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1082dd0a0fcdSRichard Henderson         int64_t dd = aa < bb ? aa : bb;
1083dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1084dd0a0fcdSRichard Henderson     }
1085dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1086dd0a0fcdSRichard Henderson }
1087dd0a0fcdSRichard Henderson 
1088dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1089dd0a0fcdSRichard Henderson {
1090dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1091dd0a0fcdSRichard Henderson     intptr_t i;
1092dd0a0fcdSRichard Henderson 
1093dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1094dd0a0fcdSRichard Henderson         int8_t aa = *(int8_t *)(a + i);
1095dd0a0fcdSRichard Henderson         int8_t bb = *(int8_t *)(b + i);
1096dd0a0fcdSRichard Henderson         int8_t dd = aa > bb ? aa : bb;
1097dd0a0fcdSRichard Henderson         *(int8_t *)(d + i) = dd;
1098dd0a0fcdSRichard Henderson     }
1099dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1100dd0a0fcdSRichard Henderson }
1101dd0a0fcdSRichard Henderson 
1102dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1103dd0a0fcdSRichard Henderson {
1104dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1105dd0a0fcdSRichard Henderson     intptr_t i;
1106dd0a0fcdSRichard Henderson 
1107dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1108dd0a0fcdSRichard Henderson         int16_t aa = *(int16_t *)(a + i);
1109dd0a0fcdSRichard Henderson         int16_t bb = *(int16_t *)(b + i);
1110dd0a0fcdSRichard Henderson         int16_t dd = aa > bb ? aa : bb;
1111dd0a0fcdSRichard Henderson         *(int16_t *)(d + i) = dd;
1112dd0a0fcdSRichard Henderson     }
1113dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1114dd0a0fcdSRichard Henderson }
1115dd0a0fcdSRichard Henderson 
1116dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1117dd0a0fcdSRichard Henderson {
1118dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1119dd0a0fcdSRichard Henderson     intptr_t i;
1120dd0a0fcdSRichard Henderson 
1121dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1122dd0a0fcdSRichard Henderson         int32_t aa = *(int32_t *)(a + i);
1123dd0a0fcdSRichard Henderson         int32_t bb = *(int32_t *)(b + i);
1124dd0a0fcdSRichard Henderson         int32_t dd = aa > bb ? aa : bb;
1125dd0a0fcdSRichard Henderson         *(int32_t *)(d + i) = dd;
1126dd0a0fcdSRichard Henderson     }
1127dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1128dd0a0fcdSRichard Henderson }
1129dd0a0fcdSRichard Henderson 
1130dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1131dd0a0fcdSRichard Henderson {
1132dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1133dd0a0fcdSRichard Henderson     intptr_t i;
1134dd0a0fcdSRichard Henderson 
1135dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1136dd0a0fcdSRichard Henderson         int64_t aa = *(int64_t *)(a + i);
1137dd0a0fcdSRichard Henderson         int64_t bb = *(int64_t *)(b + i);
1138dd0a0fcdSRichard Henderson         int64_t dd = aa > bb ? aa : bb;
1139dd0a0fcdSRichard Henderson         *(int64_t *)(d + i) = dd;
1140dd0a0fcdSRichard Henderson     }
1141dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1142dd0a0fcdSRichard Henderson }
1143dd0a0fcdSRichard Henderson 
1144dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1145dd0a0fcdSRichard Henderson {
1146dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1147dd0a0fcdSRichard Henderson     intptr_t i;
1148dd0a0fcdSRichard Henderson 
1149dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1150dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1151dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1152dd0a0fcdSRichard Henderson         uint8_t dd = aa < bb ? aa : bb;
1153dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1154dd0a0fcdSRichard Henderson     }
1155dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1156dd0a0fcdSRichard Henderson }
1157dd0a0fcdSRichard Henderson 
1158dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1159dd0a0fcdSRichard Henderson {
1160dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1161dd0a0fcdSRichard Henderson     intptr_t i;
1162dd0a0fcdSRichard Henderson 
1163dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1164dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1165dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1166dd0a0fcdSRichard Henderson         uint16_t dd = aa < bb ? aa : bb;
1167dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1168dd0a0fcdSRichard Henderson     }
1169dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1170dd0a0fcdSRichard Henderson }
1171dd0a0fcdSRichard Henderson 
1172dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1173dd0a0fcdSRichard Henderson {
1174dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1175dd0a0fcdSRichard Henderson     intptr_t i;
1176dd0a0fcdSRichard Henderson 
1177dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1178dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1179dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1180dd0a0fcdSRichard Henderson         uint32_t dd = aa < bb ? aa : bb;
1181dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1182dd0a0fcdSRichard Henderson     }
1183dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1184dd0a0fcdSRichard Henderson }
1185dd0a0fcdSRichard Henderson 
1186dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1187dd0a0fcdSRichard Henderson {
1188dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1189dd0a0fcdSRichard Henderson     intptr_t i;
1190dd0a0fcdSRichard Henderson 
1191dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1192dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1193dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1194dd0a0fcdSRichard Henderson         uint64_t dd = aa < bb ? aa : bb;
1195dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1196dd0a0fcdSRichard Henderson     }
1197dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1198dd0a0fcdSRichard Henderson }
1199dd0a0fcdSRichard Henderson 
1200dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1201dd0a0fcdSRichard Henderson {
1202dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1203dd0a0fcdSRichard Henderson     intptr_t i;
1204dd0a0fcdSRichard Henderson 
1205dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1206dd0a0fcdSRichard Henderson         uint8_t aa = *(uint8_t *)(a + i);
1207dd0a0fcdSRichard Henderson         uint8_t bb = *(uint8_t *)(b + i);
1208dd0a0fcdSRichard Henderson         uint8_t dd = aa > bb ? aa : bb;
1209dd0a0fcdSRichard Henderson         *(uint8_t *)(d + i) = dd;
1210dd0a0fcdSRichard Henderson     }
1211dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1212dd0a0fcdSRichard Henderson }
1213dd0a0fcdSRichard Henderson 
1214dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1215dd0a0fcdSRichard Henderson {
1216dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1217dd0a0fcdSRichard Henderson     intptr_t i;
1218dd0a0fcdSRichard Henderson 
1219dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1220dd0a0fcdSRichard Henderson         uint16_t aa = *(uint16_t *)(a + i);
1221dd0a0fcdSRichard Henderson         uint16_t bb = *(uint16_t *)(b + i);
1222dd0a0fcdSRichard Henderson         uint16_t dd = aa > bb ? aa : bb;
1223dd0a0fcdSRichard Henderson         *(uint16_t *)(d + i) = dd;
1224dd0a0fcdSRichard Henderson     }
1225dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1226dd0a0fcdSRichard Henderson }
1227dd0a0fcdSRichard Henderson 
1228dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1229dd0a0fcdSRichard Henderson {
1230dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1231dd0a0fcdSRichard Henderson     intptr_t i;
1232dd0a0fcdSRichard Henderson 
1233dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1234dd0a0fcdSRichard Henderson         uint32_t aa = *(uint32_t *)(a + i);
1235dd0a0fcdSRichard Henderson         uint32_t bb = *(uint32_t *)(b + i);
1236dd0a0fcdSRichard Henderson         uint32_t dd = aa > bb ? aa : bb;
1237dd0a0fcdSRichard Henderson         *(uint32_t *)(d + i) = dd;
1238dd0a0fcdSRichard Henderson     }
1239dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1240dd0a0fcdSRichard Henderson }
1241dd0a0fcdSRichard Henderson 
1242dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1243dd0a0fcdSRichard Henderson {
1244dd0a0fcdSRichard Henderson     intptr_t oprsz = simd_oprsz(desc);
1245dd0a0fcdSRichard Henderson     intptr_t i;
1246dd0a0fcdSRichard Henderson 
1247dd0a0fcdSRichard Henderson     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1248dd0a0fcdSRichard Henderson         uint64_t aa = *(uint64_t *)(a + i);
1249dd0a0fcdSRichard Henderson         uint64_t bb = *(uint64_t *)(b + i);
1250dd0a0fcdSRichard Henderson         uint64_t dd = aa > bb ? aa : bb;
1251dd0a0fcdSRichard Henderson         *(uint64_t *)(d + i) = dd;
1252dd0a0fcdSRichard Henderson     }
1253dd0a0fcdSRichard Henderson     clear_high(d, oprsz, desc);
1254dd0a0fcdSRichard Henderson }
1255