1db432672SRichard Henderson /* 2db432672SRichard Henderson * Generic vectorized operation runtime 3db432672SRichard Henderson * 4db432672SRichard Henderson * Copyright (c) 2018 Linaro 5db432672SRichard Henderson * 6db432672SRichard Henderson * This library is free software; you can redistribute it and/or 7db432672SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8db432672SRichard Henderson * License as published by the Free Software Foundation; either 9*fb0343d5SThomas Huth * version 2.1 of the License, or (at your option) any later version. 10db432672SRichard Henderson * 11db432672SRichard Henderson * This library is distributed in the hope that it will be useful, 12db432672SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13db432672SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14db432672SRichard Henderson * Lesser General Public License for more details. 15db432672SRichard Henderson * 16db432672SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17db432672SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18db432672SRichard Henderson */ 19db432672SRichard Henderson 20db432672SRichard Henderson #include "qemu/osdep.h" 21db432672SRichard Henderson #include "qemu/host-utils.h" 22db432672SRichard Henderson #include "cpu.h" 23db432672SRichard Henderson #include "exec/helper-proto.h" 24db432672SRichard Henderson #include "tcg-gvec-desc.h" 25db432672SRichard Henderson 26db432672SRichard Henderson 27db432672SRichard Henderson /* Virtually all hosts support 16-byte vectors. Those that don't can emulate 28db432672SRichard Henderson * them via GCC's generic vector extension. This turns out to be simpler and 29db432672SRichard Henderson * more reliable than getting the compiler to autovectorize. 30db432672SRichard Henderson * 31db432672SRichard Henderson * In tcg-op-gvec.c, we asserted that both the size and alignment of the data 32db432672SRichard Henderson * are multiples of 16. 33db432672SRichard Henderson * 34db432672SRichard Henderson * When the compiler does not support all of the operations we require, the 35db432672SRichard Henderson * loops are written so that we can always fall back on the base types. 36db432672SRichard Henderson */ 37db432672SRichard Henderson #ifdef CONFIG_VECTOR16 38db432672SRichard Henderson typedef uint8_t vec8 __attribute__((vector_size(16))); 39db432672SRichard Henderson typedef uint16_t vec16 __attribute__((vector_size(16))); 40db432672SRichard Henderson typedef uint32_t vec32 __attribute__((vector_size(16))); 41db432672SRichard Henderson typedef uint64_t vec64 __attribute__((vector_size(16))); 42db432672SRichard Henderson 43db432672SRichard Henderson typedef int8_t svec8 __attribute__((vector_size(16))); 44db432672SRichard Henderson typedef int16_t svec16 __attribute__((vector_size(16))); 45db432672SRichard Henderson typedef int32_t svec32 __attribute__((vector_size(16))); 46db432672SRichard Henderson typedef int64_t svec64 __attribute__((vector_size(16))); 47db432672SRichard Henderson 48db432672SRichard Henderson #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X } 49db432672SRichard Henderson #define DUP8(X) { X, X, X, X, X, X, X, X } 50db432672SRichard Henderson #define DUP4(X) { X, X, X, X } 51db432672SRichard Henderson #define DUP2(X) { X, X } 52db432672SRichard Henderson #else 53db432672SRichard Henderson typedef uint8_t vec8; 54db432672SRichard Henderson typedef uint16_t vec16; 55db432672SRichard Henderson typedef uint32_t vec32; 56db432672SRichard Henderson typedef uint64_t vec64; 57db432672SRichard Henderson 58db432672SRichard Henderson typedef int8_t svec8; 59db432672SRichard Henderson typedef int16_t svec16; 60db432672SRichard Henderson typedef int32_t svec32; 61db432672SRichard Henderson typedef int64_t svec64; 62db432672SRichard Henderson 63db432672SRichard Henderson #define DUP16(X) X 64db432672SRichard Henderson #define DUP8(X) X 65db432672SRichard Henderson #define DUP4(X) X 66db432672SRichard Henderson #define DUP2(X) X 67db432672SRichard Henderson #endif /* CONFIG_VECTOR16 */ 68db432672SRichard Henderson 69db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 70db432672SRichard Henderson { 71db432672SRichard Henderson intptr_t maxsz = simd_maxsz(desc); 72db432672SRichard Henderson intptr_t i; 73db432672SRichard Henderson 74db432672SRichard Henderson if (unlikely(maxsz > oprsz)) { 75db432672SRichard Henderson for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 76db432672SRichard Henderson *(uint64_t *)(d + i) = 0; 77db432672SRichard Henderson } 78db432672SRichard Henderson } 79db432672SRichard Henderson } 80db432672SRichard Henderson 81db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 82db432672SRichard Henderson { 83db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 84db432672SRichard Henderson intptr_t i; 85db432672SRichard Henderson 86db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 87db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i); 88db432672SRichard Henderson } 89db432672SRichard Henderson clear_high(d, oprsz, desc); 90db432672SRichard Henderson } 91db432672SRichard Henderson 92db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 93db432672SRichard Henderson { 94db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 95db432672SRichard Henderson intptr_t i; 96db432672SRichard Henderson 97db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 98db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i); 99db432672SRichard Henderson } 100db432672SRichard Henderson clear_high(d, oprsz, desc); 101db432672SRichard Henderson } 102db432672SRichard Henderson 103db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 104db432672SRichard Henderson { 105db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 106db432672SRichard Henderson intptr_t i; 107db432672SRichard Henderson 108db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 109db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i); 110db432672SRichard Henderson } 111db432672SRichard Henderson clear_high(d, oprsz, desc); 112db432672SRichard Henderson } 113db432672SRichard Henderson 114db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 115db432672SRichard Henderson { 116db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 117db432672SRichard Henderson intptr_t i; 118db432672SRichard Henderson 119db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 120db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i); 121db432672SRichard Henderson } 122db432672SRichard Henderson clear_high(d, oprsz, desc); 123db432672SRichard Henderson } 124db432672SRichard Henderson 12522fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 12622fc3527SRichard Henderson { 12722fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 12822fc3527SRichard Henderson vec8 vecb = (vec8)DUP16(b); 12922fc3527SRichard Henderson intptr_t i; 13022fc3527SRichard Henderson 13122fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 13222fc3527SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb; 13322fc3527SRichard Henderson } 13422fc3527SRichard Henderson clear_high(d, oprsz, desc); 13522fc3527SRichard Henderson } 13622fc3527SRichard Henderson 13722fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 13822fc3527SRichard Henderson { 13922fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 14022fc3527SRichard Henderson vec16 vecb = (vec16)DUP8(b); 14122fc3527SRichard Henderson intptr_t i; 14222fc3527SRichard Henderson 14322fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 14422fc3527SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb; 14522fc3527SRichard Henderson } 14622fc3527SRichard Henderson clear_high(d, oprsz, desc); 14722fc3527SRichard Henderson } 14822fc3527SRichard Henderson 14922fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 15022fc3527SRichard Henderson { 15122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 15222fc3527SRichard Henderson vec32 vecb = (vec32)DUP4(b); 15322fc3527SRichard Henderson intptr_t i; 15422fc3527SRichard Henderson 15522fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 15622fc3527SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb; 15722fc3527SRichard Henderson } 15822fc3527SRichard Henderson clear_high(d, oprsz, desc); 15922fc3527SRichard Henderson } 16022fc3527SRichard Henderson 16122fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 16222fc3527SRichard Henderson { 16322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 16422fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 16522fc3527SRichard Henderson intptr_t i; 16622fc3527SRichard Henderson 16722fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 16822fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb; 16922fc3527SRichard Henderson } 17022fc3527SRichard Henderson clear_high(d, oprsz, desc); 17122fc3527SRichard Henderson } 17222fc3527SRichard Henderson 173db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 174db432672SRichard Henderson { 175db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 176db432672SRichard Henderson intptr_t i; 177db432672SRichard Henderson 178db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 179db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i); 180db432672SRichard Henderson } 181db432672SRichard Henderson clear_high(d, oprsz, desc); 182db432672SRichard Henderson } 183db432672SRichard Henderson 184db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 185db432672SRichard Henderson { 186db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 187db432672SRichard Henderson intptr_t i; 188db432672SRichard Henderson 189db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 190db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i); 191db432672SRichard Henderson } 192db432672SRichard Henderson clear_high(d, oprsz, desc); 193db432672SRichard Henderson } 194db432672SRichard Henderson 195db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 196db432672SRichard Henderson { 197db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 198db432672SRichard Henderson intptr_t i; 199db432672SRichard Henderson 200db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 201db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i); 202db432672SRichard Henderson } 203db432672SRichard Henderson clear_high(d, oprsz, desc); 204db432672SRichard Henderson } 205db432672SRichard Henderson 206db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 207db432672SRichard Henderson { 208db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 209db432672SRichard Henderson intptr_t i; 210db432672SRichard Henderson 211db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 212db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i); 213db432672SRichard Henderson } 214db432672SRichard Henderson clear_high(d, oprsz, desc); 215db432672SRichard Henderson } 216db432672SRichard Henderson 21722fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 21822fc3527SRichard Henderson { 21922fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 22022fc3527SRichard Henderson vec8 vecb = (vec8)DUP16(b); 22122fc3527SRichard Henderson intptr_t i; 22222fc3527SRichard Henderson 22322fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 22422fc3527SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb; 22522fc3527SRichard Henderson } 22622fc3527SRichard Henderson clear_high(d, oprsz, desc); 22722fc3527SRichard Henderson } 22822fc3527SRichard Henderson 22922fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 23022fc3527SRichard Henderson { 23122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 23222fc3527SRichard Henderson vec16 vecb = (vec16)DUP8(b); 23322fc3527SRichard Henderson intptr_t i; 23422fc3527SRichard Henderson 23522fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 23622fc3527SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb; 23722fc3527SRichard Henderson } 23822fc3527SRichard Henderson clear_high(d, oprsz, desc); 23922fc3527SRichard Henderson } 24022fc3527SRichard Henderson 24122fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 24222fc3527SRichard Henderson { 24322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 24422fc3527SRichard Henderson vec32 vecb = (vec32)DUP4(b); 24522fc3527SRichard Henderson intptr_t i; 24622fc3527SRichard Henderson 24722fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 24822fc3527SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb; 24922fc3527SRichard Henderson } 25022fc3527SRichard Henderson clear_high(d, oprsz, desc); 25122fc3527SRichard Henderson } 25222fc3527SRichard Henderson 25322fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 25422fc3527SRichard Henderson { 25522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 25622fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 25722fc3527SRichard Henderson intptr_t i; 25822fc3527SRichard Henderson 25922fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 26022fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb; 26122fc3527SRichard Henderson } 26222fc3527SRichard Henderson clear_high(d, oprsz, desc); 26322fc3527SRichard Henderson } 26422fc3527SRichard Henderson 2653774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 2663774030aSRichard Henderson { 2673774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2683774030aSRichard Henderson intptr_t i; 2693774030aSRichard Henderson 2703774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 2713774030aSRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i); 2723774030aSRichard Henderson } 2733774030aSRichard Henderson clear_high(d, oprsz, desc); 2743774030aSRichard Henderson } 2753774030aSRichard Henderson 2763774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 2773774030aSRichard Henderson { 2783774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2793774030aSRichard Henderson intptr_t i; 2803774030aSRichard Henderson 2813774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 2823774030aSRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i); 2833774030aSRichard Henderson } 2843774030aSRichard Henderson clear_high(d, oprsz, desc); 2853774030aSRichard Henderson } 2863774030aSRichard Henderson 2873774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 2883774030aSRichard Henderson { 2893774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2903774030aSRichard Henderson intptr_t i; 2913774030aSRichard Henderson 2923774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 2933774030aSRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i); 2943774030aSRichard Henderson } 2953774030aSRichard Henderson clear_high(d, oprsz, desc); 2963774030aSRichard Henderson } 2973774030aSRichard Henderson 2983774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 2993774030aSRichard Henderson { 3003774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 3013774030aSRichard Henderson intptr_t i; 3023774030aSRichard Henderson 3033774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 3043774030aSRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i); 3053774030aSRichard Henderson } 3063774030aSRichard Henderson clear_high(d, oprsz, desc); 3073774030aSRichard Henderson } 3083774030aSRichard Henderson 30922fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 31022fc3527SRichard Henderson { 31122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 31222fc3527SRichard Henderson vec8 vecb = (vec8)DUP16(b); 31322fc3527SRichard Henderson intptr_t i; 31422fc3527SRichard Henderson 31522fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 31622fc3527SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb; 31722fc3527SRichard Henderson } 31822fc3527SRichard Henderson clear_high(d, oprsz, desc); 31922fc3527SRichard Henderson } 32022fc3527SRichard Henderson 32122fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 32222fc3527SRichard Henderson { 32322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 32422fc3527SRichard Henderson vec16 vecb = (vec16)DUP8(b); 32522fc3527SRichard Henderson intptr_t i; 32622fc3527SRichard Henderson 32722fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 32822fc3527SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb; 32922fc3527SRichard Henderson } 33022fc3527SRichard Henderson clear_high(d, oprsz, desc); 33122fc3527SRichard Henderson } 33222fc3527SRichard Henderson 33322fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 33422fc3527SRichard Henderson { 33522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 33622fc3527SRichard Henderson vec32 vecb = (vec32)DUP4(b); 33722fc3527SRichard Henderson intptr_t i; 33822fc3527SRichard Henderson 33922fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 34022fc3527SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb; 34122fc3527SRichard Henderson } 34222fc3527SRichard Henderson clear_high(d, oprsz, desc); 34322fc3527SRichard Henderson } 34422fc3527SRichard Henderson 34522fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 34622fc3527SRichard Henderson { 34722fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 34822fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 34922fc3527SRichard Henderson intptr_t i; 35022fc3527SRichard Henderson 35122fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 35222fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb; 35322fc3527SRichard Henderson } 35422fc3527SRichard Henderson clear_high(d, oprsz, desc); 35522fc3527SRichard Henderson } 35622fc3527SRichard Henderson 357db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 358db432672SRichard Henderson { 359db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 360db432672SRichard Henderson intptr_t i; 361db432672SRichard Henderson 362db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 363db432672SRichard Henderson *(vec8 *)(d + i) = -*(vec8 *)(a + i); 364db432672SRichard Henderson } 365db432672SRichard Henderson clear_high(d, oprsz, desc); 366db432672SRichard Henderson } 367db432672SRichard Henderson 368db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 369db432672SRichard Henderson { 370db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 371db432672SRichard Henderson intptr_t i; 372db432672SRichard Henderson 373db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 374db432672SRichard Henderson *(vec16 *)(d + i) = -*(vec16 *)(a + i); 375db432672SRichard Henderson } 376db432672SRichard Henderson clear_high(d, oprsz, desc); 377db432672SRichard Henderson } 378db432672SRichard Henderson 379db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 380db432672SRichard Henderson { 381db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 382db432672SRichard Henderson intptr_t i; 383db432672SRichard Henderson 384db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 385db432672SRichard Henderson *(vec32 *)(d + i) = -*(vec32 *)(a + i); 386db432672SRichard Henderson } 387db432672SRichard Henderson clear_high(d, oprsz, desc); 388db432672SRichard Henderson } 389db432672SRichard Henderson 390db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 391db432672SRichard Henderson { 392db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 393db432672SRichard Henderson intptr_t i; 394db432672SRichard Henderson 395db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 396db432672SRichard Henderson *(vec64 *)(d + i) = -*(vec64 *)(a + i); 397db432672SRichard Henderson } 398db432672SRichard Henderson clear_high(d, oprsz, desc); 399db432672SRichard Henderson } 400db432672SRichard Henderson 401db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 402db432672SRichard Henderson { 403db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 404db432672SRichard Henderson 405db432672SRichard Henderson memcpy(d, a, oprsz); 406db432672SRichard Henderson clear_high(d, oprsz, desc); 407db432672SRichard Henderson } 408db432672SRichard Henderson 409db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 410db432672SRichard Henderson { 411db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 412db432672SRichard Henderson intptr_t i; 413db432672SRichard Henderson 414db432672SRichard Henderson if (c == 0) { 415db432672SRichard Henderson oprsz = 0; 416db432672SRichard Henderson } else { 417db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 418db432672SRichard Henderson *(uint64_t *)(d + i) = c; 419db432672SRichard Henderson } 420db432672SRichard Henderson } 421db432672SRichard Henderson clear_high(d, oprsz, desc); 422db432672SRichard Henderson } 423db432672SRichard Henderson 424db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 425db432672SRichard Henderson { 426db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 427db432672SRichard Henderson intptr_t i; 428db432672SRichard Henderson 429db432672SRichard Henderson if (c == 0) { 430db432672SRichard Henderson oprsz = 0; 431db432672SRichard Henderson } else { 432db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 433db432672SRichard Henderson *(uint32_t *)(d + i) = c; 434db432672SRichard Henderson } 435db432672SRichard Henderson } 436db432672SRichard Henderson clear_high(d, oprsz, desc); 437db432672SRichard Henderson } 438db432672SRichard Henderson 439db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 440db432672SRichard Henderson { 441db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 442db432672SRichard Henderson } 443db432672SRichard Henderson 444db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 445db432672SRichard Henderson { 446db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 447db432672SRichard Henderson } 448db432672SRichard Henderson 449db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 450db432672SRichard Henderson { 451db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 452db432672SRichard Henderson intptr_t i; 453db432672SRichard Henderson 454db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 455db432672SRichard Henderson *(vec64 *)(d + i) = ~*(vec64 *)(a + i); 456db432672SRichard Henderson } 457db432672SRichard Henderson clear_high(d, oprsz, desc); 458db432672SRichard Henderson } 459db432672SRichard Henderson 460db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 461db432672SRichard Henderson { 462db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 463db432672SRichard Henderson intptr_t i; 464db432672SRichard Henderson 465db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 466db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i); 467db432672SRichard Henderson } 468db432672SRichard Henderson clear_high(d, oprsz, desc); 469db432672SRichard Henderson } 470db432672SRichard Henderson 471db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 472db432672SRichard Henderson { 473db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 474db432672SRichard Henderson intptr_t i; 475db432672SRichard Henderson 476db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 477db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i); 478db432672SRichard Henderson } 479db432672SRichard Henderson clear_high(d, oprsz, desc); 480db432672SRichard Henderson } 481db432672SRichard Henderson 482db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 483db432672SRichard Henderson { 484db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 485db432672SRichard Henderson intptr_t i; 486db432672SRichard Henderson 487db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 488db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i); 489db432672SRichard Henderson } 490db432672SRichard Henderson clear_high(d, oprsz, desc); 491db432672SRichard Henderson } 492db432672SRichard Henderson 493db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 494db432672SRichard Henderson { 495db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 496db432672SRichard Henderson intptr_t i; 497db432672SRichard Henderson 498db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 499db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i); 500db432672SRichard Henderson } 501db432672SRichard Henderson clear_high(d, oprsz, desc); 502db432672SRichard Henderson } 503db432672SRichard Henderson 504db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 505db432672SRichard Henderson { 506db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 507db432672SRichard Henderson intptr_t i; 508db432672SRichard Henderson 509db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 510db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i); 511db432672SRichard Henderson } 512db432672SRichard Henderson clear_high(d, oprsz, desc); 513db432672SRichard Henderson } 514d0ec9796SRichard Henderson 515f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 516f550805dSRichard Henderson { 517f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 518f550805dSRichard Henderson intptr_t i; 519f550805dSRichard Henderson 520f550805dSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 521f550805dSRichard Henderson *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i)); 522f550805dSRichard Henderson } 523f550805dSRichard Henderson clear_high(d, oprsz, desc); 524f550805dSRichard Henderson } 525f550805dSRichard Henderson 526f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 527f550805dSRichard Henderson { 528f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 529f550805dSRichard Henderson intptr_t i; 530f550805dSRichard Henderson 531f550805dSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 532f550805dSRichard Henderson *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i)); 533f550805dSRichard Henderson } 534f550805dSRichard Henderson clear_high(d, oprsz, desc); 535f550805dSRichard Henderson } 536f550805dSRichard Henderson 537f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 538f550805dSRichard Henderson { 539f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 540f550805dSRichard Henderson intptr_t i; 541f550805dSRichard Henderson 542f550805dSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 543f550805dSRichard Henderson *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i)); 544f550805dSRichard Henderson } 545f550805dSRichard Henderson clear_high(d, oprsz, desc); 546f550805dSRichard Henderson } 547f550805dSRichard Henderson 54822fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 54922fc3527SRichard Henderson { 55022fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 55122fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 55222fc3527SRichard Henderson intptr_t i; 55322fc3527SRichard Henderson 55422fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 55522fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb; 55622fc3527SRichard Henderson } 55722fc3527SRichard Henderson clear_high(d, oprsz, desc); 55822fc3527SRichard Henderson } 55922fc3527SRichard Henderson 56022fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 56122fc3527SRichard Henderson { 56222fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 56322fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 56422fc3527SRichard Henderson intptr_t i; 56522fc3527SRichard Henderson 56622fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 56722fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb; 56822fc3527SRichard Henderson } 56922fc3527SRichard Henderson clear_high(d, oprsz, desc); 57022fc3527SRichard Henderson } 57122fc3527SRichard Henderson 57222fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 57322fc3527SRichard Henderson { 57422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 57522fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 57622fc3527SRichard Henderson intptr_t i; 57722fc3527SRichard Henderson 57822fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 57922fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb; 58022fc3527SRichard Henderson } 58122fc3527SRichard Henderson clear_high(d, oprsz, desc); 58222fc3527SRichard Henderson } 58322fc3527SRichard Henderson 584d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 585d0ec9796SRichard Henderson { 586d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 587d0ec9796SRichard Henderson int shift = simd_data(desc); 588d0ec9796SRichard Henderson intptr_t i; 589d0ec9796SRichard Henderson 590d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 591d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift; 592d0ec9796SRichard Henderson } 593d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 594d0ec9796SRichard Henderson } 595d0ec9796SRichard Henderson 596d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 597d0ec9796SRichard Henderson { 598d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 599d0ec9796SRichard Henderson int shift = simd_data(desc); 600d0ec9796SRichard Henderson intptr_t i; 601d0ec9796SRichard Henderson 602d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 603d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift; 604d0ec9796SRichard Henderson } 605d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 606d0ec9796SRichard Henderson } 607d0ec9796SRichard Henderson 608d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 609d0ec9796SRichard Henderson { 610d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 611d0ec9796SRichard Henderson int shift = simd_data(desc); 612d0ec9796SRichard Henderson intptr_t i; 613d0ec9796SRichard Henderson 614d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 615d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift; 616d0ec9796SRichard Henderson } 617d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 618d0ec9796SRichard Henderson } 619d0ec9796SRichard Henderson 620d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 621d0ec9796SRichard Henderson { 622d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 623d0ec9796SRichard Henderson int shift = simd_data(desc); 624d0ec9796SRichard Henderson intptr_t i; 625d0ec9796SRichard Henderson 626d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 627d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift; 628d0ec9796SRichard Henderson } 629d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 630d0ec9796SRichard Henderson } 631d0ec9796SRichard Henderson 632d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 633d0ec9796SRichard Henderson { 634d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 635d0ec9796SRichard Henderson int shift = simd_data(desc); 636d0ec9796SRichard Henderson intptr_t i; 637d0ec9796SRichard Henderson 638d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 639d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift; 640d0ec9796SRichard Henderson } 641d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 642d0ec9796SRichard Henderson } 643d0ec9796SRichard Henderson 644d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 645d0ec9796SRichard Henderson { 646d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 647d0ec9796SRichard Henderson int shift = simd_data(desc); 648d0ec9796SRichard Henderson intptr_t i; 649d0ec9796SRichard Henderson 650d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 651d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift; 652d0ec9796SRichard Henderson } 653d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 654d0ec9796SRichard Henderson } 655d0ec9796SRichard Henderson 656d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 657d0ec9796SRichard Henderson { 658d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 659d0ec9796SRichard Henderson int shift = simd_data(desc); 660d0ec9796SRichard Henderson intptr_t i; 661d0ec9796SRichard Henderson 662d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 663d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift; 664d0ec9796SRichard Henderson } 665d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 666d0ec9796SRichard Henderson } 667d0ec9796SRichard Henderson 668d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 669d0ec9796SRichard Henderson { 670d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 671d0ec9796SRichard Henderson int shift = simd_data(desc); 672d0ec9796SRichard Henderson intptr_t i; 673d0ec9796SRichard Henderson 674d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 675d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift; 676d0ec9796SRichard Henderson } 677d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 678d0ec9796SRichard Henderson } 679d0ec9796SRichard Henderson 680d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 681d0ec9796SRichard Henderson { 682d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 683d0ec9796SRichard Henderson int shift = simd_data(desc); 684d0ec9796SRichard Henderson intptr_t i; 685d0ec9796SRichard Henderson 686d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 687d0ec9796SRichard Henderson *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift; 688d0ec9796SRichard Henderson } 689d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 690d0ec9796SRichard Henderson } 691d0ec9796SRichard Henderson 692d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 693d0ec9796SRichard Henderson { 694d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 695d0ec9796SRichard Henderson int shift = simd_data(desc); 696d0ec9796SRichard Henderson intptr_t i; 697d0ec9796SRichard Henderson 698d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 699d0ec9796SRichard Henderson *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift; 700d0ec9796SRichard Henderson } 701d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 702d0ec9796SRichard Henderson } 703d0ec9796SRichard Henderson 704d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 705d0ec9796SRichard Henderson { 706d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 707d0ec9796SRichard Henderson int shift = simd_data(desc); 708d0ec9796SRichard Henderson intptr_t i; 709d0ec9796SRichard Henderson 710d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 711d0ec9796SRichard Henderson *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift; 712d0ec9796SRichard Henderson } 713d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 714d0ec9796SRichard Henderson } 715d0ec9796SRichard Henderson 716d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 717d0ec9796SRichard Henderson { 718d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 719d0ec9796SRichard Henderson int shift = simd_data(desc); 720d0ec9796SRichard Henderson intptr_t i; 721d0ec9796SRichard Henderson 722d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 723d0ec9796SRichard Henderson *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift; 724d0ec9796SRichard Henderson } 725d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 726d0ec9796SRichard Henderson } 727212be173SRichard Henderson 728212be173SRichard Henderson /* If vectors are enabled, the compiler fills in -1 for true. 729212be173SRichard Henderson Otherwise, we must take care of this by hand. */ 730212be173SRichard Henderson #ifdef CONFIG_VECTOR16 731212be173SRichard Henderson # define DO_CMP0(X) X 732212be173SRichard Henderson #else 733212be173SRichard Henderson # define DO_CMP0(X) -(X) 734212be173SRichard Henderson #endif 735212be173SRichard Henderson 736212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP) \ 737212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 738212be173SRichard Henderson { \ 739212be173SRichard Henderson intptr_t oprsz = simd_oprsz(desc); \ 740212be173SRichard Henderson intptr_t i; \ 7416cb1d3b8SRichard Henderson for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 742212be173SRichard Henderson *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 743212be173SRichard Henderson } \ 744212be173SRichard Henderson clear_high(d, oprsz, desc); \ 745212be173SRichard Henderson } 746212be173SRichard Henderson 747212be173SRichard Henderson #define DO_CMP2(SZ) \ 748212be173SRichard Henderson DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \ 749212be173SRichard Henderson DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \ 750212be173SRichard Henderson DO_CMP1(gvec_lt##SZ, svec##SZ, <) \ 751212be173SRichard Henderson DO_CMP1(gvec_le##SZ, svec##SZ, <=) \ 752212be173SRichard Henderson DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \ 753212be173SRichard Henderson DO_CMP1(gvec_leu##SZ, vec##SZ, <=) 754212be173SRichard Henderson 755212be173SRichard Henderson DO_CMP2(8) 756212be173SRichard Henderson DO_CMP2(16) 757212be173SRichard Henderson DO_CMP2(32) 758212be173SRichard Henderson DO_CMP2(64) 759212be173SRichard Henderson 760212be173SRichard Henderson #undef DO_CMP0 761212be173SRichard Henderson #undef DO_CMP1 762212be173SRichard Henderson #undef DO_CMP2 763f49b12c6SRichard Henderson 764f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 765f49b12c6SRichard Henderson { 766f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 767f49b12c6SRichard Henderson intptr_t i; 768f49b12c6SRichard Henderson 769f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 770f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 771f49b12c6SRichard Henderson if (r > INT8_MAX) { 772f49b12c6SRichard Henderson r = INT8_MAX; 773f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 774f49b12c6SRichard Henderson r = INT8_MIN; 775f49b12c6SRichard Henderson } 776f49b12c6SRichard Henderson *(int8_t *)(d + i) = r; 777f49b12c6SRichard Henderson } 778f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 779f49b12c6SRichard Henderson } 780f49b12c6SRichard Henderson 781f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 782f49b12c6SRichard Henderson { 783f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 784f49b12c6SRichard Henderson intptr_t i; 785f49b12c6SRichard Henderson 786f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 787f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 788f49b12c6SRichard Henderson if (r > INT16_MAX) { 789f49b12c6SRichard Henderson r = INT16_MAX; 790f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 791f49b12c6SRichard Henderson r = INT16_MIN; 792f49b12c6SRichard Henderson } 793f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 794f49b12c6SRichard Henderson } 795f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 796f49b12c6SRichard Henderson } 797f49b12c6SRichard Henderson 798f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 799f49b12c6SRichard Henderson { 800f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 801f49b12c6SRichard Henderson intptr_t i; 802f49b12c6SRichard Henderson 803f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 804f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 805f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 806f49b12c6SRichard Henderson int32_t di = ai + bi; 807f49b12c6SRichard Henderson if (((di ^ ai) &~ (ai ^ bi)) < 0) { 808f49b12c6SRichard Henderson /* Signed overflow. */ 809f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 810f49b12c6SRichard Henderson } 811f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 812f49b12c6SRichard Henderson } 813f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 814f49b12c6SRichard Henderson } 815f49b12c6SRichard Henderson 816f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 817f49b12c6SRichard Henderson { 818f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 819f49b12c6SRichard Henderson intptr_t i; 820f49b12c6SRichard Henderson 821f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 822f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 823f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 824f49b12c6SRichard Henderson int64_t di = ai + bi; 825f49b12c6SRichard Henderson if (((di ^ ai) &~ (ai ^ bi)) < 0) { 826f49b12c6SRichard Henderson /* Signed overflow. */ 827f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 828f49b12c6SRichard Henderson } 829f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 830f49b12c6SRichard Henderson } 831f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 832f49b12c6SRichard Henderson } 833f49b12c6SRichard Henderson 834f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 835f49b12c6SRichard Henderson { 836f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 837f49b12c6SRichard Henderson intptr_t i; 838f49b12c6SRichard Henderson 839f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 840f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 841f49b12c6SRichard Henderson if (r > INT8_MAX) { 842f49b12c6SRichard Henderson r = INT8_MAX; 843f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 844f49b12c6SRichard Henderson r = INT8_MIN; 845f49b12c6SRichard Henderson } 846f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 847f49b12c6SRichard Henderson } 848f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 849f49b12c6SRichard Henderson } 850f49b12c6SRichard Henderson 851f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 852f49b12c6SRichard Henderson { 853f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 854f49b12c6SRichard Henderson intptr_t i; 855f49b12c6SRichard Henderson 856f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 857f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 858f49b12c6SRichard Henderson if (r > INT16_MAX) { 859f49b12c6SRichard Henderson r = INT16_MAX; 860f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 861f49b12c6SRichard Henderson r = INT16_MIN; 862f49b12c6SRichard Henderson } 863f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 864f49b12c6SRichard Henderson } 865f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 866f49b12c6SRichard Henderson } 867f49b12c6SRichard Henderson 868f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 869f49b12c6SRichard Henderson { 870f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 871f49b12c6SRichard Henderson intptr_t i; 872f49b12c6SRichard Henderson 873f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 874f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 875f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 876f49b12c6SRichard Henderson int32_t di = ai - bi; 877f49b12c6SRichard Henderson if (((di ^ ai) & (ai ^ bi)) < 0) { 878f49b12c6SRichard Henderson /* Signed overflow. */ 879f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 880f49b12c6SRichard Henderson } 881f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 882f49b12c6SRichard Henderson } 883f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 884f49b12c6SRichard Henderson } 885f49b12c6SRichard Henderson 886f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 887f49b12c6SRichard Henderson { 888f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 889f49b12c6SRichard Henderson intptr_t i; 890f49b12c6SRichard Henderson 891f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 892f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 893f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 894f49b12c6SRichard Henderson int64_t di = ai - bi; 895f49b12c6SRichard Henderson if (((di ^ ai) & (ai ^ bi)) < 0) { 896f49b12c6SRichard Henderson /* Signed overflow. */ 897f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 898f49b12c6SRichard Henderson } 899f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 900f49b12c6SRichard Henderson } 901f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 902f49b12c6SRichard Henderson } 903f49b12c6SRichard Henderson 904f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 905f49b12c6SRichard Henderson { 906f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 907f49b12c6SRichard Henderson intptr_t i; 908f49b12c6SRichard Henderson 909f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 910f49b12c6SRichard Henderson unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 911f49b12c6SRichard Henderson if (r > UINT8_MAX) { 912f49b12c6SRichard Henderson r = UINT8_MAX; 913f49b12c6SRichard Henderson } 914f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 915f49b12c6SRichard Henderson } 916f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 917f49b12c6SRichard Henderson } 918f49b12c6SRichard Henderson 919f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 920f49b12c6SRichard Henderson { 921f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 922f49b12c6SRichard Henderson intptr_t i; 923f49b12c6SRichard Henderson 924f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 925f49b12c6SRichard Henderson unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 926f49b12c6SRichard Henderson if (r > UINT16_MAX) { 927f49b12c6SRichard Henderson r = UINT16_MAX; 928f49b12c6SRichard Henderson } 929f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 930f49b12c6SRichard Henderson } 931f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 932f49b12c6SRichard Henderson } 933f49b12c6SRichard Henderson 934f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 935f49b12c6SRichard Henderson { 936f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 937f49b12c6SRichard Henderson intptr_t i; 938f49b12c6SRichard Henderson 939f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 940f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 941f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 942f49b12c6SRichard Henderson uint32_t di = ai + bi; 943f49b12c6SRichard Henderson if (di < ai) { 944f49b12c6SRichard Henderson di = UINT32_MAX; 945f49b12c6SRichard Henderson } 946f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 947f49b12c6SRichard Henderson } 948f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 949f49b12c6SRichard Henderson } 950f49b12c6SRichard Henderson 951f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 952f49b12c6SRichard Henderson { 953f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 954f49b12c6SRichard Henderson intptr_t i; 955f49b12c6SRichard Henderson 956f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 957f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 958f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 959f49b12c6SRichard Henderson uint64_t di = ai + bi; 960f49b12c6SRichard Henderson if (di < ai) { 961f49b12c6SRichard Henderson di = UINT64_MAX; 962f49b12c6SRichard Henderson } 963f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 964f49b12c6SRichard Henderson } 965f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 966f49b12c6SRichard Henderson } 967f49b12c6SRichard Henderson 968f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 969f49b12c6SRichard Henderson { 970f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 971f49b12c6SRichard Henderson intptr_t i; 972f49b12c6SRichard Henderson 973f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 974f49b12c6SRichard Henderson int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 975f49b12c6SRichard Henderson if (r < 0) { 976f49b12c6SRichard Henderson r = 0; 977f49b12c6SRichard Henderson } 978f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 979f49b12c6SRichard Henderson } 980f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 981f49b12c6SRichard Henderson } 982f49b12c6SRichard Henderson 983f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 984f49b12c6SRichard Henderson { 985f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 986f49b12c6SRichard Henderson intptr_t i; 987f49b12c6SRichard Henderson 988f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 989f49b12c6SRichard Henderson int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 990f49b12c6SRichard Henderson if (r < 0) { 991f49b12c6SRichard Henderson r = 0; 992f49b12c6SRichard Henderson } 993f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 994f49b12c6SRichard Henderson } 995f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 996f49b12c6SRichard Henderson } 997f49b12c6SRichard Henderson 998f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 999f49b12c6SRichard Henderson { 1000f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1001f49b12c6SRichard Henderson intptr_t i; 1002f49b12c6SRichard Henderson 1003f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1004f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1005f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 1006f49b12c6SRichard Henderson uint32_t di = ai - bi; 1007f49b12c6SRichard Henderson if (ai < bi) { 1008f49b12c6SRichard Henderson di = 0; 1009f49b12c6SRichard Henderson } 1010f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1011f49b12c6SRichard Henderson } 1012f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1013f49b12c6SRichard Henderson } 1014f49b12c6SRichard Henderson 1015f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1016f49b12c6SRichard Henderson { 1017f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1018f49b12c6SRichard Henderson intptr_t i; 1019f49b12c6SRichard Henderson 1020f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1021f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1022f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 1023f49b12c6SRichard Henderson uint64_t di = ai - bi; 1024f49b12c6SRichard Henderson if (ai < bi) { 1025f49b12c6SRichard Henderson di = 0; 1026f49b12c6SRichard Henderson } 1027f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1028f49b12c6SRichard Henderson } 1029f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1030f49b12c6SRichard Henderson } 1031dd0a0fcdSRichard Henderson 1032dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1033dd0a0fcdSRichard Henderson { 1034dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1035dd0a0fcdSRichard Henderson intptr_t i; 1036dd0a0fcdSRichard Henderson 1037dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1038dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1039dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1040dd0a0fcdSRichard Henderson int8_t dd = aa < bb ? aa : bb; 1041dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1042dd0a0fcdSRichard Henderson } 1043dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1044dd0a0fcdSRichard Henderson } 1045dd0a0fcdSRichard Henderson 1046dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1047dd0a0fcdSRichard Henderson { 1048dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1049dd0a0fcdSRichard Henderson intptr_t i; 1050dd0a0fcdSRichard Henderson 1051dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1052dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1053dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1054dd0a0fcdSRichard Henderson int16_t dd = aa < bb ? aa : bb; 1055dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1056dd0a0fcdSRichard Henderson } 1057dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1058dd0a0fcdSRichard Henderson } 1059dd0a0fcdSRichard Henderson 1060dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1061dd0a0fcdSRichard Henderson { 1062dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1063dd0a0fcdSRichard Henderson intptr_t i; 1064dd0a0fcdSRichard Henderson 1065dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1066dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1067dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1068dd0a0fcdSRichard Henderson int32_t dd = aa < bb ? aa : bb; 1069dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1070dd0a0fcdSRichard Henderson } 1071dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1072dd0a0fcdSRichard Henderson } 1073dd0a0fcdSRichard Henderson 1074dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1075dd0a0fcdSRichard Henderson { 1076dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1077dd0a0fcdSRichard Henderson intptr_t i; 1078dd0a0fcdSRichard Henderson 1079dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1080dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1081dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1082dd0a0fcdSRichard Henderson int64_t dd = aa < bb ? aa : bb; 1083dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1084dd0a0fcdSRichard Henderson } 1085dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1086dd0a0fcdSRichard Henderson } 1087dd0a0fcdSRichard Henderson 1088dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1089dd0a0fcdSRichard Henderson { 1090dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1091dd0a0fcdSRichard Henderson intptr_t i; 1092dd0a0fcdSRichard Henderson 1093dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1094dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1095dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1096dd0a0fcdSRichard Henderson int8_t dd = aa > bb ? aa : bb; 1097dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1098dd0a0fcdSRichard Henderson } 1099dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1100dd0a0fcdSRichard Henderson } 1101dd0a0fcdSRichard Henderson 1102dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1103dd0a0fcdSRichard Henderson { 1104dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1105dd0a0fcdSRichard Henderson intptr_t i; 1106dd0a0fcdSRichard Henderson 1107dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1108dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1109dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1110dd0a0fcdSRichard Henderson int16_t dd = aa > bb ? aa : bb; 1111dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1112dd0a0fcdSRichard Henderson } 1113dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1114dd0a0fcdSRichard Henderson } 1115dd0a0fcdSRichard Henderson 1116dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1117dd0a0fcdSRichard Henderson { 1118dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1119dd0a0fcdSRichard Henderson intptr_t i; 1120dd0a0fcdSRichard Henderson 1121dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1122dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1123dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1124dd0a0fcdSRichard Henderson int32_t dd = aa > bb ? aa : bb; 1125dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1126dd0a0fcdSRichard Henderson } 1127dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1128dd0a0fcdSRichard Henderson } 1129dd0a0fcdSRichard Henderson 1130dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1131dd0a0fcdSRichard Henderson { 1132dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1133dd0a0fcdSRichard Henderson intptr_t i; 1134dd0a0fcdSRichard Henderson 1135dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1136dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1137dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1138dd0a0fcdSRichard Henderson int64_t dd = aa > bb ? aa : bb; 1139dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1140dd0a0fcdSRichard Henderson } 1141dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1142dd0a0fcdSRichard Henderson } 1143dd0a0fcdSRichard Henderson 1144dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1145dd0a0fcdSRichard Henderson { 1146dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1147dd0a0fcdSRichard Henderson intptr_t i; 1148dd0a0fcdSRichard Henderson 1149dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1150dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1151dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1152dd0a0fcdSRichard Henderson uint8_t dd = aa < bb ? aa : bb; 1153dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1154dd0a0fcdSRichard Henderson } 1155dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1156dd0a0fcdSRichard Henderson } 1157dd0a0fcdSRichard Henderson 1158dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1159dd0a0fcdSRichard Henderson { 1160dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1161dd0a0fcdSRichard Henderson intptr_t i; 1162dd0a0fcdSRichard Henderson 1163dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1164dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1165dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1166dd0a0fcdSRichard Henderson uint16_t dd = aa < bb ? aa : bb; 1167dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1168dd0a0fcdSRichard Henderson } 1169dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1170dd0a0fcdSRichard Henderson } 1171dd0a0fcdSRichard Henderson 1172dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1173dd0a0fcdSRichard Henderson { 1174dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1175dd0a0fcdSRichard Henderson intptr_t i; 1176dd0a0fcdSRichard Henderson 1177dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1178dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1179dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1180dd0a0fcdSRichard Henderson uint32_t dd = aa < bb ? aa : bb; 1181dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1182dd0a0fcdSRichard Henderson } 1183dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1184dd0a0fcdSRichard Henderson } 1185dd0a0fcdSRichard Henderson 1186dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1187dd0a0fcdSRichard Henderson { 1188dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1189dd0a0fcdSRichard Henderson intptr_t i; 1190dd0a0fcdSRichard Henderson 1191dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1192dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1193dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1194dd0a0fcdSRichard Henderson uint64_t dd = aa < bb ? aa : bb; 1195dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1196dd0a0fcdSRichard Henderson } 1197dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1198dd0a0fcdSRichard Henderson } 1199dd0a0fcdSRichard Henderson 1200dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1201dd0a0fcdSRichard Henderson { 1202dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1203dd0a0fcdSRichard Henderson intptr_t i; 1204dd0a0fcdSRichard Henderson 1205dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1206dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1207dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1208dd0a0fcdSRichard Henderson uint8_t dd = aa > bb ? aa : bb; 1209dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1210dd0a0fcdSRichard Henderson } 1211dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1212dd0a0fcdSRichard Henderson } 1213dd0a0fcdSRichard Henderson 1214dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1215dd0a0fcdSRichard Henderson { 1216dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1217dd0a0fcdSRichard Henderson intptr_t i; 1218dd0a0fcdSRichard Henderson 1219dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1220dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1221dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1222dd0a0fcdSRichard Henderson uint16_t dd = aa > bb ? aa : bb; 1223dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1224dd0a0fcdSRichard Henderson } 1225dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1226dd0a0fcdSRichard Henderson } 1227dd0a0fcdSRichard Henderson 1228dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1229dd0a0fcdSRichard Henderson { 1230dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1231dd0a0fcdSRichard Henderson intptr_t i; 1232dd0a0fcdSRichard Henderson 1233dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1234dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1235dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1236dd0a0fcdSRichard Henderson uint32_t dd = aa > bb ? aa : bb; 1237dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1238dd0a0fcdSRichard Henderson } 1239dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1240dd0a0fcdSRichard Henderson } 1241dd0a0fcdSRichard Henderson 1242dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1243dd0a0fcdSRichard Henderson { 1244dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1245dd0a0fcdSRichard Henderson intptr_t i; 1246dd0a0fcdSRichard Henderson 1247dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1248dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1249dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1250dd0a0fcdSRichard Henderson uint64_t dd = aa > bb ? aa : bb; 1251dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1252dd0a0fcdSRichard Henderson } 1253dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1254dd0a0fcdSRichard Henderson } 1255