1db432672SRichard Henderson /* 2db432672SRichard Henderson * Generic vectorized operation runtime 3db432672SRichard Henderson * 4db432672SRichard Henderson * Copyright (c) 2018 Linaro 5db432672SRichard Henderson * 6db432672SRichard Henderson * This library is free software; you can redistribute it and/or 7db432672SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8db432672SRichard Henderson * License as published by the Free Software Foundation; either 9fb0343d5SThomas Huth * version 2.1 of the License, or (at your option) any later version. 10db432672SRichard Henderson * 11db432672SRichard Henderson * This library is distributed in the hope that it will be useful, 12db432672SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13db432672SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14db432672SRichard Henderson * Lesser General Public License for more details. 15db432672SRichard Henderson * 16db432672SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17db432672SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18db432672SRichard Henderson */ 19db432672SRichard Henderson 20db432672SRichard Henderson #include "qemu/osdep.h" 21db432672SRichard Henderson #include "qemu/host-utils.h" 22db432672SRichard Henderson #include "cpu.h" 23db432672SRichard Henderson #include "exec/helper-proto.h" 24db432672SRichard Henderson #include "tcg-gvec-desc.h" 25db432672SRichard Henderson 26db432672SRichard Henderson 27db432672SRichard Henderson /* Virtually all hosts support 16-byte vectors. Those that don't can emulate 28db432672SRichard Henderson * them via GCC's generic vector extension. This turns out to be simpler and 29db432672SRichard Henderson * more reliable than getting the compiler to autovectorize. 30db432672SRichard Henderson * 31db432672SRichard Henderson * In tcg-op-gvec.c, we asserted that both the size and alignment of the data 32db432672SRichard Henderson * are multiples of 16. 33db432672SRichard Henderson * 34db432672SRichard Henderson * When the compiler does not support all of the operations we require, the 35db432672SRichard Henderson * loops are written so that we can always fall back on the base types. 36db432672SRichard Henderson */ 37db432672SRichard Henderson #ifdef CONFIG_VECTOR16 38db432672SRichard Henderson typedef uint8_t vec8 __attribute__((vector_size(16))); 39db432672SRichard Henderson typedef uint16_t vec16 __attribute__((vector_size(16))); 40db432672SRichard Henderson typedef uint32_t vec32 __attribute__((vector_size(16))); 41db432672SRichard Henderson typedef uint64_t vec64 __attribute__((vector_size(16))); 42db432672SRichard Henderson 43db432672SRichard Henderson typedef int8_t svec8 __attribute__((vector_size(16))); 44db432672SRichard Henderson typedef int16_t svec16 __attribute__((vector_size(16))); 45db432672SRichard Henderson typedef int32_t svec32 __attribute__((vector_size(16))); 46db432672SRichard Henderson typedef int64_t svec64 __attribute__((vector_size(16))); 47db432672SRichard Henderson 48db432672SRichard Henderson #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X } 49db432672SRichard Henderson #define DUP8(X) { X, X, X, X, X, X, X, X } 50db432672SRichard Henderson #define DUP4(X) { X, X, X, X } 51db432672SRichard Henderson #define DUP2(X) { X, X } 52db432672SRichard Henderson #else 53db432672SRichard Henderson typedef uint8_t vec8; 54db432672SRichard Henderson typedef uint16_t vec16; 55db432672SRichard Henderson typedef uint32_t vec32; 56db432672SRichard Henderson typedef uint64_t vec64; 57db432672SRichard Henderson 58db432672SRichard Henderson typedef int8_t svec8; 59db432672SRichard Henderson typedef int16_t svec16; 60db432672SRichard Henderson typedef int32_t svec32; 61db432672SRichard Henderson typedef int64_t svec64; 62db432672SRichard Henderson 63db432672SRichard Henderson #define DUP16(X) X 64db432672SRichard Henderson #define DUP8(X) X 65db432672SRichard Henderson #define DUP4(X) X 66db432672SRichard Henderson #define DUP2(X) X 67db432672SRichard Henderson #endif /* CONFIG_VECTOR16 */ 68db432672SRichard Henderson 69db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 70db432672SRichard Henderson { 71db432672SRichard Henderson intptr_t maxsz = simd_maxsz(desc); 72db432672SRichard Henderson intptr_t i; 73db432672SRichard Henderson 74db432672SRichard Henderson if (unlikely(maxsz > oprsz)) { 75db432672SRichard Henderson for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 76db432672SRichard Henderson *(uint64_t *)(d + i) = 0; 77db432672SRichard Henderson } 78db432672SRichard Henderson } 79db432672SRichard Henderson } 80db432672SRichard Henderson 81db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 82db432672SRichard Henderson { 83db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 84db432672SRichard Henderson intptr_t i; 85db432672SRichard Henderson 86db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 87db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i); 88db432672SRichard Henderson } 89db432672SRichard Henderson clear_high(d, oprsz, desc); 90db432672SRichard Henderson } 91db432672SRichard Henderson 92db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 93db432672SRichard Henderson { 94db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 95db432672SRichard Henderson intptr_t i; 96db432672SRichard Henderson 97db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 98db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i); 99db432672SRichard Henderson } 100db432672SRichard Henderson clear_high(d, oprsz, desc); 101db432672SRichard Henderson } 102db432672SRichard Henderson 103db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 104db432672SRichard Henderson { 105db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 106db432672SRichard Henderson intptr_t i; 107db432672SRichard Henderson 108db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 109db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i); 110db432672SRichard Henderson } 111db432672SRichard Henderson clear_high(d, oprsz, desc); 112db432672SRichard Henderson } 113db432672SRichard Henderson 114db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 115db432672SRichard Henderson { 116db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 117db432672SRichard Henderson intptr_t i; 118db432672SRichard Henderson 119db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 120db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i); 121db432672SRichard Henderson } 122db432672SRichard Henderson clear_high(d, oprsz, desc); 123db432672SRichard Henderson } 124db432672SRichard Henderson 12522fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 12622fc3527SRichard Henderson { 12722fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 12822fc3527SRichard Henderson vec8 vecb = (vec8)DUP16(b); 12922fc3527SRichard Henderson intptr_t i; 13022fc3527SRichard Henderson 13122fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 13222fc3527SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb; 13322fc3527SRichard Henderson } 13422fc3527SRichard Henderson clear_high(d, oprsz, desc); 13522fc3527SRichard Henderson } 13622fc3527SRichard Henderson 13722fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 13822fc3527SRichard Henderson { 13922fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 14022fc3527SRichard Henderson vec16 vecb = (vec16)DUP8(b); 14122fc3527SRichard Henderson intptr_t i; 14222fc3527SRichard Henderson 14322fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 14422fc3527SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb; 14522fc3527SRichard Henderson } 14622fc3527SRichard Henderson clear_high(d, oprsz, desc); 14722fc3527SRichard Henderson } 14822fc3527SRichard Henderson 14922fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 15022fc3527SRichard Henderson { 15122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 15222fc3527SRichard Henderson vec32 vecb = (vec32)DUP4(b); 15322fc3527SRichard Henderson intptr_t i; 15422fc3527SRichard Henderson 15522fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 15622fc3527SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb; 15722fc3527SRichard Henderson } 15822fc3527SRichard Henderson clear_high(d, oprsz, desc); 15922fc3527SRichard Henderson } 16022fc3527SRichard Henderson 16122fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 16222fc3527SRichard Henderson { 16322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 16422fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 16522fc3527SRichard Henderson intptr_t i; 16622fc3527SRichard Henderson 16722fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 16822fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb; 16922fc3527SRichard Henderson } 17022fc3527SRichard Henderson clear_high(d, oprsz, desc); 17122fc3527SRichard Henderson } 17222fc3527SRichard Henderson 173db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 174db432672SRichard Henderson { 175db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 176db432672SRichard Henderson intptr_t i; 177db432672SRichard Henderson 178db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 179db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i); 180db432672SRichard Henderson } 181db432672SRichard Henderson clear_high(d, oprsz, desc); 182db432672SRichard Henderson } 183db432672SRichard Henderson 184db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 185db432672SRichard Henderson { 186db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 187db432672SRichard Henderson intptr_t i; 188db432672SRichard Henderson 189db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 190db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i); 191db432672SRichard Henderson } 192db432672SRichard Henderson clear_high(d, oprsz, desc); 193db432672SRichard Henderson } 194db432672SRichard Henderson 195db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 196db432672SRichard Henderson { 197db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 198db432672SRichard Henderson intptr_t i; 199db432672SRichard Henderson 200db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 201db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i); 202db432672SRichard Henderson } 203db432672SRichard Henderson clear_high(d, oprsz, desc); 204db432672SRichard Henderson } 205db432672SRichard Henderson 206db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 207db432672SRichard Henderson { 208db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 209db432672SRichard Henderson intptr_t i; 210db432672SRichard Henderson 211db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 212db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i); 213db432672SRichard Henderson } 214db432672SRichard Henderson clear_high(d, oprsz, desc); 215db432672SRichard Henderson } 216db432672SRichard Henderson 21722fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 21822fc3527SRichard Henderson { 21922fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 22022fc3527SRichard Henderson vec8 vecb = (vec8)DUP16(b); 22122fc3527SRichard Henderson intptr_t i; 22222fc3527SRichard Henderson 22322fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 22422fc3527SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb; 22522fc3527SRichard Henderson } 22622fc3527SRichard Henderson clear_high(d, oprsz, desc); 22722fc3527SRichard Henderson } 22822fc3527SRichard Henderson 22922fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 23022fc3527SRichard Henderson { 23122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 23222fc3527SRichard Henderson vec16 vecb = (vec16)DUP8(b); 23322fc3527SRichard Henderson intptr_t i; 23422fc3527SRichard Henderson 23522fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 23622fc3527SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb; 23722fc3527SRichard Henderson } 23822fc3527SRichard Henderson clear_high(d, oprsz, desc); 23922fc3527SRichard Henderson } 24022fc3527SRichard Henderson 24122fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 24222fc3527SRichard Henderson { 24322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 24422fc3527SRichard Henderson vec32 vecb = (vec32)DUP4(b); 24522fc3527SRichard Henderson intptr_t i; 24622fc3527SRichard Henderson 24722fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 24822fc3527SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb; 24922fc3527SRichard Henderson } 25022fc3527SRichard Henderson clear_high(d, oprsz, desc); 25122fc3527SRichard Henderson } 25222fc3527SRichard Henderson 25322fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 25422fc3527SRichard Henderson { 25522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 25622fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 25722fc3527SRichard Henderson intptr_t i; 25822fc3527SRichard Henderson 25922fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 26022fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb; 26122fc3527SRichard Henderson } 26222fc3527SRichard Henderson clear_high(d, oprsz, desc); 26322fc3527SRichard Henderson } 26422fc3527SRichard Henderson 2653774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 2663774030aSRichard Henderson { 2673774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2683774030aSRichard Henderson intptr_t i; 2693774030aSRichard Henderson 2703774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 2713774030aSRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i); 2723774030aSRichard Henderson } 2733774030aSRichard Henderson clear_high(d, oprsz, desc); 2743774030aSRichard Henderson } 2753774030aSRichard Henderson 2763774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 2773774030aSRichard Henderson { 2783774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2793774030aSRichard Henderson intptr_t i; 2803774030aSRichard Henderson 2813774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 2823774030aSRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i); 2833774030aSRichard Henderson } 2843774030aSRichard Henderson clear_high(d, oprsz, desc); 2853774030aSRichard Henderson } 2863774030aSRichard Henderson 2873774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 2883774030aSRichard Henderson { 2893774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2903774030aSRichard Henderson intptr_t i; 2913774030aSRichard Henderson 2923774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 2933774030aSRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i); 2943774030aSRichard Henderson } 2953774030aSRichard Henderson clear_high(d, oprsz, desc); 2963774030aSRichard Henderson } 2973774030aSRichard Henderson 2983774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 2993774030aSRichard Henderson { 3003774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 3013774030aSRichard Henderson intptr_t i; 3023774030aSRichard Henderson 3033774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 3043774030aSRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i); 3053774030aSRichard Henderson } 3063774030aSRichard Henderson clear_high(d, oprsz, desc); 3073774030aSRichard Henderson } 3083774030aSRichard Henderson 30922fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 31022fc3527SRichard Henderson { 31122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 31222fc3527SRichard Henderson vec8 vecb = (vec8)DUP16(b); 31322fc3527SRichard Henderson intptr_t i; 31422fc3527SRichard Henderson 31522fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 31622fc3527SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb; 31722fc3527SRichard Henderson } 31822fc3527SRichard Henderson clear_high(d, oprsz, desc); 31922fc3527SRichard Henderson } 32022fc3527SRichard Henderson 32122fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 32222fc3527SRichard Henderson { 32322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 32422fc3527SRichard Henderson vec16 vecb = (vec16)DUP8(b); 32522fc3527SRichard Henderson intptr_t i; 32622fc3527SRichard Henderson 32722fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 32822fc3527SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb; 32922fc3527SRichard Henderson } 33022fc3527SRichard Henderson clear_high(d, oprsz, desc); 33122fc3527SRichard Henderson } 33222fc3527SRichard Henderson 33322fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 33422fc3527SRichard Henderson { 33522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 33622fc3527SRichard Henderson vec32 vecb = (vec32)DUP4(b); 33722fc3527SRichard Henderson intptr_t i; 33822fc3527SRichard Henderson 33922fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 34022fc3527SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb; 34122fc3527SRichard Henderson } 34222fc3527SRichard Henderson clear_high(d, oprsz, desc); 34322fc3527SRichard Henderson } 34422fc3527SRichard Henderson 34522fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 34622fc3527SRichard Henderson { 34722fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 34822fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 34922fc3527SRichard Henderson intptr_t i; 35022fc3527SRichard Henderson 35122fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 35222fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb; 35322fc3527SRichard Henderson } 35422fc3527SRichard Henderson clear_high(d, oprsz, desc); 35522fc3527SRichard Henderson } 35622fc3527SRichard Henderson 357db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 358db432672SRichard Henderson { 359db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 360db432672SRichard Henderson intptr_t i; 361db432672SRichard Henderson 362db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 363db432672SRichard Henderson *(vec8 *)(d + i) = -*(vec8 *)(a + i); 364db432672SRichard Henderson } 365db432672SRichard Henderson clear_high(d, oprsz, desc); 366db432672SRichard Henderson } 367db432672SRichard Henderson 368db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 369db432672SRichard Henderson { 370db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 371db432672SRichard Henderson intptr_t i; 372db432672SRichard Henderson 373db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 374db432672SRichard Henderson *(vec16 *)(d + i) = -*(vec16 *)(a + i); 375db432672SRichard Henderson } 376db432672SRichard Henderson clear_high(d, oprsz, desc); 377db432672SRichard Henderson } 378db432672SRichard Henderson 379db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 380db432672SRichard Henderson { 381db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 382db432672SRichard Henderson intptr_t i; 383db432672SRichard Henderson 384db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 385db432672SRichard Henderson *(vec32 *)(d + i) = -*(vec32 *)(a + i); 386db432672SRichard Henderson } 387db432672SRichard Henderson clear_high(d, oprsz, desc); 388db432672SRichard Henderson } 389db432672SRichard Henderson 390db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 391db432672SRichard Henderson { 392db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 393db432672SRichard Henderson intptr_t i; 394db432672SRichard Henderson 395db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 396db432672SRichard Henderson *(vec64 *)(d + i) = -*(vec64 *)(a + i); 397db432672SRichard Henderson } 398db432672SRichard Henderson clear_high(d, oprsz, desc); 399db432672SRichard Henderson } 400db432672SRichard Henderson 401bcefc902SRichard Henderson void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) 402bcefc902SRichard Henderson { 403bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 404bcefc902SRichard Henderson intptr_t i; 405bcefc902SRichard Henderson 406bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 407bcefc902SRichard Henderson int8_t aa = *(int8_t *)(a + i); 408bcefc902SRichard Henderson *(int8_t *)(d + i) = aa < 0 ? -aa : aa; 409bcefc902SRichard Henderson } 410bcefc902SRichard Henderson clear_high(d, oprsz, desc); 411bcefc902SRichard Henderson } 412bcefc902SRichard Henderson 413bcefc902SRichard Henderson void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) 414bcefc902SRichard Henderson { 415bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 416bcefc902SRichard Henderson intptr_t i; 417bcefc902SRichard Henderson 418bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 419bcefc902SRichard Henderson int16_t aa = *(int16_t *)(a + i); 420bcefc902SRichard Henderson *(int16_t *)(d + i) = aa < 0 ? -aa : aa; 421bcefc902SRichard Henderson } 422bcefc902SRichard Henderson clear_high(d, oprsz, desc); 423bcefc902SRichard Henderson } 424bcefc902SRichard Henderson 425bcefc902SRichard Henderson void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) 426bcefc902SRichard Henderson { 427bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 428bcefc902SRichard Henderson intptr_t i; 429bcefc902SRichard Henderson 430bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 431bcefc902SRichard Henderson int32_t aa = *(int32_t *)(a + i); 432bcefc902SRichard Henderson *(int32_t *)(d + i) = aa < 0 ? -aa : aa; 433bcefc902SRichard Henderson } 434bcefc902SRichard Henderson clear_high(d, oprsz, desc); 435bcefc902SRichard Henderson } 436bcefc902SRichard Henderson 437bcefc902SRichard Henderson void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) 438bcefc902SRichard Henderson { 439bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 440bcefc902SRichard Henderson intptr_t i; 441bcefc902SRichard Henderson 442bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 443bcefc902SRichard Henderson int64_t aa = *(int64_t *)(a + i); 444bcefc902SRichard Henderson *(int64_t *)(d + i) = aa < 0 ? -aa : aa; 445bcefc902SRichard Henderson } 446bcefc902SRichard Henderson clear_high(d, oprsz, desc); 447bcefc902SRichard Henderson } 448bcefc902SRichard Henderson 449db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 450db432672SRichard Henderson { 451db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 452db432672SRichard Henderson 453db432672SRichard Henderson memcpy(d, a, oprsz); 454db432672SRichard Henderson clear_high(d, oprsz, desc); 455db432672SRichard Henderson } 456db432672SRichard Henderson 457db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 458db432672SRichard Henderson { 459db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 460db432672SRichard Henderson intptr_t i; 461db432672SRichard Henderson 462db432672SRichard Henderson if (c == 0) { 463db432672SRichard Henderson oprsz = 0; 464db432672SRichard Henderson } else { 465db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 466db432672SRichard Henderson *(uint64_t *)(d + i) = c; 467db432672SRichard Henderson } 468db432672SRichard Henderson } 469db432672SRichard Henderson clear_high(d, oprsz, desc); 470db432672SRichard Henderson } 471db432672SRichard Henderson 472db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 473db432672SRichard Henderson { 474db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 475db432672SRichard Henderson intptr_t i; 476db432672SRichard Henderson 477db432672SRichard Henderson if (c == 0) { 478db432672SRichard Henderson oprsz = 0; 479db432672SRichard Henderson } else { 480db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 481db432672SRichard Henderson *(uint32_t *)(d + i) = c; 482db432672SRichard Henderson } 483db432672SRichard Henderson } 484db432672SRichard Henderson clear_high(d, oprsz, desc); 485db432672SRichard Henderson } 486db432672SRichard Henderson 487db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 488db432672SRichard Henderson { 489db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 490db432672SRichard Henderson } 491db432672SRichard Henderson 492db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 493db432672SRichard Henderson { 494db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 495db432672SRichard Henderson } 496db432672SRichard Henderson 497db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 498db432672SRichard Henderson { 499db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 500db432672SRichard Henderson intptr_t i; 501db432672SRichard Henderson 502db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 503db432672SRichard Henderson *(vec64 *)(d + i) = ~*(vec64 *)(a + i); 504db432672SRichard Henderson } 505db432672SRichard Henderson clear_high(d, oprsz, desc); 506db432672SRichard Henderson } 507db432672SRichard Henderson 508db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 509db432672SRichard Henderson { 510db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 511db432672SRichard Henderson intptr_t i; 512db432672SRichard Henderson 513db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 514db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i); 515db432672SRichard Henderson } 516db432672SRichard Henderson clear_high(d, oprsz, desc); 517db432672SRichard Henderson } 518db432672SRichard Henderson 519db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 520db432672SRichard Henderson { 521db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 522db432672SRichard Henderson intptr_t i; 523db432672SRichard Henderson 524db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 525db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i); 526db432672SRichard Henderson } 527db432672SRichard Henderson clear_high(d, oprsz, desc); 528db432672SRichard Henderson } 529db432672SRichard Henderson 530db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 531db432672SRichard Henderson { 532db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 533db432672SRichard Henderson intptr_t i; 534db432672SRichard Henderson 535db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 536db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i); 537db432672SRichard Henderson } 538db432672SRichard Henderson clear_high(d, oprsz, desc); 539db432672SRichard Henderson } 540db432672SRichard Henderson 541db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 542db432672SRichard Henderson { 543db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 544db432672SRichard Henderson intptr_t i; 545db432672SRichard Henderson 546db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 547db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i); 548db432672SRichard Henderson } 549db432672SRichard Henderson clear_high(d, oprsz, desc); 550db432672SRichard Henderson } 551db432672SRichard Henderson 552db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 553db432672SRichard Henderson { 554db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 555db432672SRichard Henderson intptr_t i; 556db432672SRichard Henderson 557db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 558db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i); 559db432672SRichard Henderson } 560db432672SRichard Henderson clear_high(d, oprsz, desc); 561db432672SRichard Henderson } 562d0ec9796SRichard Henderson 563f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 564f550805dSRichard Henderson { 565f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 566f550805dSRichard Henderson intptr_t i; 567f550805dSRichard Henderson 568f550805dSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 569f550805dSRichard Henderson *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i)); 570f550805dSRichard Henderson } 571f550805dSRichard Henderson clear_high(d, oprsz, desc); 572f550805dSRichard Henderson } 573f550805dSRichard Henderson 574f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 575f550805dSRichard Henderson { 576f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 577f550805dSRichard Henderson intptr_t i; 578f550805dSRichard Henderson 579f550805dSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 580f550805dSRichard Henderson *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i)); 581f550805dSRichard Henderson } 582f550805dSRichard Henderson clear_high(d, oprsz, desc); 583f550805dSRichard Henderson } 584f550805dSRichard Henderson 585f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 586f550805dSRichard Henderson { 587f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 588f550805dSRichard Henderson intptr_t i; 589f550805dSRichard Henderson 590f550805dSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 591f550805dSRichard Henderson *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i)); 592f550805dSRichard Henderson } 593f550805dSRichard Henderson clear_high(d, oprsz, desc); 594f550805dSRichard Henderson } 595f550805dSRichard Henderson 59622fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 59722fc3527SRichard Henderson { 59822fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 59922fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 60022fc3527SRichard Henderson intptr_t i; 60122fc3527SRichard Henderson 60222fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 60322fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb; 60422fc3527SRichard Henderson } 60522fc3527SRichard Henderson clear_high(d, oprsz, desc); 60622fc3527SRichard Henderson } 60722fc3527SRichard Henderson 60822fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 60922fc3527SRichard Henderson { 61022fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 61122fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 61222fc3527SRichard Henderson intptr_t i; 61322fc3527SRichard Henderson 61422fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 61522fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb; 61622fc3527SRichard Henderson } 61722fc3527SRichard Henderson clear_high(d, oprsz, desc); 61822fc3527SRichard Henderson } 61922fc3527SRichard Henderson 62022fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 62122fc3527SRichard Henderson { 62222fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 62322fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 62422fc3527SRichard Henderson intptr_t i; 62522fc3527SRichard Henderson 62622fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 62722fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb; 62822fc3527SRichard Henderson } 62922fc3527SRichard Henderson clear_high(d, oprsz, desc); 63022fc3527SRichard Henderson } 63122fc3527SRichard Henderson 632d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 633d0ec9796SRichard Henderson { 634d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 635d0ec9796SRichard Henderson int shift = simd_data(desc); 636d0ec9796SRichard Henderson intptr_t i; 637d0ec9796SRichard Henderson 638d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 639d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift; 640d0ec9796SRichard Henderson } 641d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 642d0ec9796SRichard Henderson } 643d0ec9796SRichard Henderson 644d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 645d0ec9796SRichard Henderson { 646d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 647d0ec9796SRichard Henderson int shift = simd_data(desc); 648d0ec9796SRichard Henderson intptr_t i; 649d0ec9796SRichard Henderson 650d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 651d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift; 652d0ec9796SRichard Henderson } 653d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 654d0ec9796SRichard Henderson } 655d0ec9796SRichard Henderson 656d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 657d0ec9796SRichard Henderson { 658d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 659d0ec9796SRichard Henderson int shift = simd_data(desc); 660d0ec9796SRichard Henderson intptr_t i; 661d0ec9796SRichard Henderson 662d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 663d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift; 664d0ec9796SRichard Henderson } 665d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 666d0ec9796SRichard Henderson } 667d0ec9796SRichard Henderson 668d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 669d0ec9796SRichard Henderson { 670d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 671d0ec9796SRichard Henderson int shift = simd_data(desc); 672d0ec9796SRichard Henderson intptr_t i; 673d0ec9796SRichard Henderson 674d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 675d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift; 676d0ec9796SRichard Henderson } 677d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 678d0ec9796SRichard Henderson } 679d0ec9796SRichard Henderson 680d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 681d0ec9796SRichard Henderson { 682d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 683d0ec9796SRichard Henderson int shift = simd_data(desc); 684d0ec9796SRichard Henderson intptr_t i; 685d0ec9796SRichard Henderson 686d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 687d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift; 688d0ec9796SRichard Henderson } 689d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 690d0ec9796SRichard Henderson } 691d0ec9796SRichard Henderson 692d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 693d0ec9796SRichard Henderson { 694d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 695d0ec9796SRichard Henderson int shift = simd_data(desc); 696d0ec9796SRichard Henderson intptr_t i; 697d0ec9796SRichard Henderson 698d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 699d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift; 700d0ec9796SRichard Henderson } 701d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 702d0ec9796SRichard Henderson } 703d0ec9796SRichard Henderson 704d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 705d0ec9796SRichard Henderson { 706d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 707d0ec9796SRichard Henderson int shift = simd_data(desc); 708d0ec9796SRichard Henderson intptr_t i; 709d0ec9796SRichard Henderson 710d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 711d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift; 712d0ec9796SRichard Henderson } 713d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 714d0ec9796SRichard Henderson } 715d0ec9796SRichard Henderson 716d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 717d0ec9796SRichard Henderson { 718d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 719d0ec9796SRichard Henderson int shift = simd_data(desc); 720d0ec9796SRichard Henderson intptr_t i; 721d0ec9796SRichard Henderson 722d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 723d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift; 724d0ec9796SRichard Henderson } 725d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 726d0ec9796SRichard Henderson } 727d0ec9796SRichard Henderson 728d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 729d0ec9796SRichard Henderson { 730d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 731d0ec9796SRichard Henderson int shift = simd_data(desc); 732d0ec9796SRichard Henderson intptr_t i; 733d0ec9796SRichard Henderson 734d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 735d0ec9796SRichard Henderson *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift; 736d0ec9796SRichard Henderson } 737d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 738d0ec9796SRichard Henderson } 739d0ec9796SRichard Henderson 740d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 741d0ec9796SRichard Henderson { 742d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 743d0ec9796SRichard Henderson int shift = simd_data(desc); 744d0ec9796SRichard Henderson intptr_t i; 745d0ec9796SRichard Henderson 746d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 747d0ec9796SRichard Henderson *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift; 748d0ec9796SRichard Henderson } 749d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 750d0ec9796SRichard Henderson } 751d0ec9796SRichard Henderson 752d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 753d0ec9796SRichard Henderson { 754d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 755d0ec9796SRichard Henderson int shift = simd_data(desc); 756d0ec9796SRichard Henderson intptr_t i; 757d0ec9796SRichard Henderson 758d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 759d0ec9796SRichard Henderson *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift; 760d0ec9796SRichard Henderson } 761d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 762d0ec9796SRichard Henderson } 763d0ec9796SRichard Henderson 764d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 765d0ec9796SRichard Henderson { 766d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 767d0ec9796SRichard Henderson int shift = simd_data(desc); 768d0ec9796SRichard Henderson intptr_t i; 769d0ec9796SRichard Henderson 770d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 771d0ec9796SRichard Henderson *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift; 772d0ec9796SRichard Henderson } 773d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 774d0ec9796SRichard Henderson } 775212be173SRichard Henderson 7765ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 7775ee5c14cSRichard Henderson { 7785ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7795ee5c14cSRichard Henderson intptr_t i; 7805ee5c14cSRichard Henderson 7815ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 7825ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 7835ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 7845ee5c14cSRichard Henderson } 7855ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7865ee5c14cSRichard Henderson } 7875ee5c14cSRichard Henderson 7885ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 7895ee5c14cSRichard Henderson { 7905ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7915ee5c14cSRichard Henderson intptr_t i; 7925ee5c14cSRichard Henderson 7935ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 7945ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 7955ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 7965ee5c14cSRichard Henderson } 7975ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7985ee5c14cSRichard Henderson } 7995ee5c14cSRichard Henderson 8005ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 8015ee5c14cSRichard Henderson { 8025ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8035ee5c14cSRichard Henderson intptr_t i; 8045ee5c14cSRichard Henderson 8055ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 8065ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 8075ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 8085ee5c14cSRichard Henderson } 8095ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8105ee5c14cSRichard Henderson } 8115ee5c14cSRichard Henderson 8125ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 8135ee5c14cSRichard Henderson { 8145ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8155ee5c14cSRichard Henderson intptr_t i; 8165ee5c14cSRichard Henderson 8175ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 8185ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 8195ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 8205ee5c14cSRichard Henderson } 8215ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8225ee5c14cSRichard Henderson } 8235ee5c14cSRichard Henderson 8245ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 8255ee5c14cSRichard Henderson { 8265ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8275ee5c14cSRichard Henderson intptr_t i; 8285ee5c14cSRichard Henderson 8295ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 8305ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 8315ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 8325ee5c14cSRichard Henderson } 8335ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8345ee5c14cSRichard Henderson } 8355ee5c14cSRichard Henderson 8365ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 8375ee5c14cSRichard Henderson { 8385ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8395ee5c14cSRichard Henderson intptr_t i; 8405ee5c14cSRichard Henderson 8415ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 8425ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 8435ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 8445ee5c14cSRichard Henderson } 8455ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8465ee5c14cSRichard Henderson } 8475ee5c14cSRichard Henderson 8485ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 8495ee5c14cSRichard Henderson { 8505ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8515ee5c14cSRichard Henderson intptr_t i; 8525ee5c14cSRichard Henderson 8535ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 8545ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 8555ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 8565ee5c14cSRichard Henderson } 8575ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8585ee5c14cSRichard Henderson } 8595ee5c14cSRichard Henderson 8605ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 8615ee5c14cSRichard Henderson { 8625ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8635ee5c14cSRichard Henderson intptr_t i; 8645ee5c14cSRichard Henderson 8655ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 8665ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 8675ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 8685ee5c14cSRichard Henderson } 8695ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8705ee5c14cSRichard Henderson } 8715ee5c14cSRichard Henderson 8725ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 8735ee5c14cSRichard Henderson { 8745ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8755ee5c14cSRichard Henderson intptr_t i; 8765ee5c14cSRichard Henderson 877*899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 8785ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 8795ee5c14cSRichard Henderson *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 8805ee5c14cSRichard Henderson } 8815ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8825ee5c14cSRichard Henderson } 8835ee5c14cSRichard Henderson 8845ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 8855ee5c14cSRichard Henderson { 8865ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8875ee5c14cSRichard Henderson intptr_t i; 8885ee5c14cSRichard Henderson 8895ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 8905ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 8915ee5c14cSRichard Henderson *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 8925ee5c14cSRichard Henderson } 8935ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8945ee5c14cSRichard Henderson } 8955ee5c14cSRichard Henderson 8965ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 8975ee5c14cSRichard Henderson { 8985ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8995ee5c14cSRichard Henderson intptr_t i; 9005ee5c14cSRichard Henderson 901*899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 9025ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 9035ee5c14cSRichard Henderson *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 9045ee5c14cSRichard Henderson } 9055ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 9065ee5c14cSRichard Henderson } 9075ee5c14cSRichard Henderson 9085ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 9095ee5c14cSRichard Henderson { 9105ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9115ee5c14cSRichard Henderson intptr_t i; 9125ee5c14cSRichard Henderson 913*899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 9145ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 9155ee5c14cSRichard Henderson *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 9165ee5c14cSRichard Henderson } 9175ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 9185ee5c14cSRichard Henderson } 9195ee5c14cSRichard Henderson 920212be173SRichard Henderson /* If vectors are enabled, the compiler fills in -1 for true. 921212be173SRichard Henderson Otherwise, we must take care of this by hand. */ 922212be173SRichard Henderson #ifdef CONFIG_VECTOR16 923212be173SRichard Henderson # define DO_CMP0(X) X 924212be173SRichard Henderson #else 925212be173SRichard Henderson # define DO_CMP0(X) -(X) 926212be173SRichard Henderson #endif 927212be173SRichard Henderson 928212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP) \ 929212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 930212be173SRichard Henderson { \ 931212be173SRichard Henderson intptr_t oprsz = simd_oprsz(desc); \ 932212be173SRichard Henderson intptr_t i; \ 9336cb1d3b8SRichard Henderson for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 934212be173SRichard Henderson *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 935212be173SRichard Henderson } \ 936212be173SRichard Henderson clear_high(d, oprsz, desc); \ 937212be173SRichard Henderson } 938212be173SRichard Henderson 939212be173SRichard Henderson #define DO_CMP2(SZ) \ 940212be173SRichard Henderson DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \ 941212be173SRichard Henderson DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \ 942212be173SRichard Henderson DO_CMP1(gvec_lt##SZ, svec##SZ, <) \ 943212be173SRichard Henderson DO_CMP1(gvec_le##SZ, svec##SZ, <=) \ 944212be173SRichard Henderson DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \ 945212be173SRichard Henderson DO_CMP1(gvec_leu##SZ, vec##SZ, <=) 946212be173SRichard Henderson 947212be173SRichard Henderson DO_CMP2(8) 948212be173SRichard Henderson DO_CMP2(16) 949212be173SRichard Henderson DO_CMP2(32) 950212be173SRichard Henderson DO_CMP2(64) 951212be173SRichard Henderson 952212be173SRichard Henderson #undef DO_CMP0 953212be173SRichard Henderson #undef DO_CMP1 954212be173SRichard Henderson #undef DO_CMP2 955f49b12c6SRichard Henderson 956f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 957f49b12c6SRichard Henderson { 958f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 959f49b12c6SRichard Henderson intptr_t i; 960f49b12c6SRichard Henderson 961f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 962f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 963f49b12c6SRichard Henderson if (r > INT8_MAX) { 964f49b12c6SRichard Henderson r = INT8_MAX; 965f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 966f49b12c6SRichard Henderson r = INT8_MIN; 967f49b12c6SRichard Henderson } 968f49b12c6SRichard Henderson *(int8_t *)(d + i) = r; 969f49b12c6SRichard Henderson } 970f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 971f49b12c6SRichard Henderson } 972f49b12c6SRichard Henderson 973f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 974f49b12c6SRichard Henderson { 975f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 976f49b12c6SRichard Henderson intptr_t i; 977f49b12c6SRichard Henderson 978f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 979f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 980f49b12c6SRichard Henderson if (r > INT16_MAX) { 981f49b12c6SRichard Henderson r = INT16_MAX; 982f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 983f49b12c6SRichard Henderson r = INT16_MIN; 984f49b12c6SRichard Henderson } 985f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 986f49b12c6SRichard Henderson } 987f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 988f49b12c6SRichard Henderson } 989f49b12c6SRichard Henderson 990f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 991f49b12c6SRichard Henderson { 992f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 993f49b12c6SRichard Henderson intptr_t i; 994f49b12c6SRichard Henderson 995f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 996f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 997f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 998f49b12c6SRichard Henderson int32_t di = ai + bi; 999f49b12c6SRichard Henderson if (((di ^ ai) &~ (ai ^ bi)) < 0) { 1000f49b12c6SRichard Henderson /* Signed overflow. */ 1001f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 1002f49b12c6SRichard Henderson } 1003f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 1004f49b12c6SRichard Henderson } 1005f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1006f49b12c6SRichard Henderson } 1007f49b12c6SRichard Henderson 1008f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 1009f49b12c6SRichard Henderson { 1010f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1011f49b12c6SRichard Henderson intptr_t i; 1012f49b12c6SRichard Henderson 1013f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1014f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 1015f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 1016f49b12c6SRichard Henderson int64_t di = ai + bi; 1017f49b12c6SRichard Henderson if (((di ^ ai) &~ (ai ^ bi)) < 0) { 1018f49b12c6SRichard Henderson /* Signed overflow. */ 1019f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 1020f49b12c6SRichard Henderson } 1021f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 1022f49b12c6SRichard Henderson } 1023f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1024f49b12c6SRichard Henderson } 1025f49b12c6SRichard Henderson 1026f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 1027f49b12c6SRichard Henderson { 1028f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1029f49b12c6SRichard Henderson intptr_t i; 1030f49b12c6SRichard Henderson 1031f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1032f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 1033f49b12c6SRichard Henderson if (r > INT8_MAX) { 1034f49b12c6SRichard Henderson r = INT8_MAX; 1035f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 1036f49b12c6SRichard Henderson r = INT8_MIN; 1037f49b12c6SRichard Henderson } 1038f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1039f49b12c6SRichard Henderson } 1040f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1041f49b12c6SRichard Henderson } 1042f49b12c6SRichard Henderson 1043f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 1044f49b12c6SRichard Henderson { 1045f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1046f49b12c6SRichard Henderson intptr_t i; 1047f49b12c6SRichard Henderson 1048f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1049f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 1050f49b12c6SRichard Henderson if (r > INT16_MAX) { 1051f49b12c6SRichard Henderson r = INT16_MAX; 1052f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 1053f49b12c6SRichard Henderson r = INT16_MIN; 1054f49b12c6SRichard Henderson } 1055f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 1056f49b12c6SRichard Henderson } 1057f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1058f49b12c6SRichard Henderson } 1059f49b12c6SRichard Henderson 1060f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 1061f49b12c6SRichard Henderson { 1062f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1063f49b12c6SRichard Henderson intptr_t i; 1064f49b12c6SRichard Henderson 1065f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1066f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 1067f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 1068f49b12c6SRichard Henderson int32_t di = ai - bi; 1069f49b12c6SRichard Henderson if (((di ^ ai) & (ai ^ bi)) < 0) { 1070f49b12c6SRichard Henderson /* Signed overflow. */ 1071f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 1072f49b12c6SRichard Henderson } 1073f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 1074f49b12c6SRichard Henderson } 1075f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1076f49b12c6SRichard Henderson } 1077f49b12c6SRichard Henderson 1078f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1079f49b12c6SRichard Henderson { 1080f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1081f49b12c6SRichard Henderson intptr_t i; 1082f49b12c6SRichard Henderson 1083f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1084f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 1085f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 1086f49b12c6SRichard Henderson int64_t di = ai - bi; 1087f49b12c6SRichard Henderson if (((di ^ ai) & (ai ^ bi)) < 0) { 1088f49b12c6SRichard Henderson /* Signed overflow. */ 1089f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 1090f49b12c6SRichard Henderson } 1091f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 1092f49b12c6SRichard Henderson } 1093f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1094f49b12c6SRichard Henderson } 1095f49b12c6SRichard Henderson 1096f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1097f49b12c6SRichard Henderson { 1098f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1099f49b12c6SRichard Henderson intptr_t i; 1100f49b12c6SRichard Henderson 1101f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1102f49b12c6SRichard Henderson unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1103f49b12c6SRichard Henderson if (r > UINT8_MAX) { 1104f49b12c6SRichard Henderson r = UINT8_MAX; 1105f49b12c6SRichard Henderson } 1106f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1107f49b12c6SRichard Henderson } 1108f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1109f49b12c6SRichard Henderson } 1110f49b12c6SRichard Henderson 1111f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1112f49b12c6SRichard Henderson { 1113f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1114f49b12c6SRichard Henderson intptr_t i; 1115f49b12c6SRichard Henderson 1116f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1117f49b12c6SRichard Henderson unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1118f49b12c6SRichard Henderson if (r > UINT16_MAX) { 1119f49b12c6SRichard Henderson r = UINT16_MAX; 1120f49b12c6SRichard Henderson } 1121f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 1122f49b12c6SRichard Henderson } 1123f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1124f49b12c6SRichard Henderson } 1125f49b12c6SRichard Henderson 1126f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1127f49b12c6SRichard Henderson { 1128f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1129f49b12c6SRichard Henderson intptr_t i; 1130f49b12c6SRichard Henderson 1131f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1132f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1133f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 1134f49b12c6SRichard Henderson uint32_t di = ai + bi; 1135f49b12c6SRichard Henderson if (di < ai) { 1136f49b12c6SRichard Henderson di = UINT32_MAX; 1137f49b12c6SRichard Henderson } 1138f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1139f49b12c6SRichard Henderson } 1140f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1141f49b12c6SRichard Henderson } 1142f49b12c6SRichard Henderson 1143f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1144f49b12c6SRichard Henderson { 1145f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1146f49b12c6SRichard Henderson intptr_t i; 1147f49b12c6SRichard Henderson 1148f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1149f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1150f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 1151f49b12c6SRichard Henderson uint64_t di = ai + bi; 1152f49b12c6SRichard Henderson if (di < ai) { 1153f49b12c6SRichard Henderson di = UINT64_MAX; 1154f49b12c6SRichard Henderson } 1155f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1156f49b12c6SRichard Henderson } 1157f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1158f49b12c6SRichard Henderson } 1159f49b12c6SRichard Henderson 1160f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1161f49b12c6SRichard Henderson { 1162f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1163f49b12c6SRichard Henderson intptr_t i; 1164f49b12c6SRichard Henderson 1165f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1166f49b12c6SRichard Henderson int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1167f49b12c6SRichard Henderson if (r < 0) { 1168f49b12c6SRichard Henderson r = 0; 1169f49b12c6SRichard Henderson } 1170f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1171f49b12c6SRichard Henderson } 1172f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1173f49b12c6SRichard Henderson } 1174f49b12c6SRichard Henderson 1175f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1176f49b12c6SRichard Henderson { 1177f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1178f49b12c6SRichard Henderson intptr_t i; 1179f49b12c6SRichard Henderson 1180f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1181f49b12c6SRichard Henderson int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1182f49b12c6SRichard Henderson if (r < 0) { 1183f49b12c6SRichard Henderson r = 0; 1184f49b12c6SRichard Henderson } 1185f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 1186f49b12c6SRichard Henderson } 1187f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1188f49b12c6SRichard Henderson } 1189f49b12c6SRichard Henderson 1190f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1191f49b12c6SRichard Henderson { 1192f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1193f49b12c6SRichard Henderson intptr_t i; 1194f49b12c6SRichard Henderson 1195f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1196f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1197f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 1198f49b12c6SRichard Henderson uint32_t di = ai - bi; 1199f49b12c6SRichard Henderson if (ai < bi) { 1200f49b12c6SRichard Henderson di = 0; 1201f49b12c6SRichard Henderson } 1202f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1203f49b12c6SRichard Henderson } 1204f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1205f49b12c6SRichard Henderson } 1206f49b12c6SRichard Henderson 1207f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1208f49b12c6SRichard Henderson { 1209f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1210f49b12c6SRichard Henderson intptr_t i; 1211f49b12c6SRichard Henderson 1212f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1213f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1214f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 1215f49b12c6SRichard Henderson uint64_t di = ai - bi; 1216f49b12c6SRichard Henderson if (ai < bi) { 1217f49b12c6SRichard Henderson di = 0; 1218f49b12c6SRichard Henderson } 1219f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1220f49b12c6SRichard Henderson } 1221f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1222f49b12c6SRichard Henderson } 1223dd0a0fcdSRichard Henderson 1224dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1225dd0a0fcdSRichard Henderson { 1226dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1227dd0a0fcdSRichard Henderson intptr_t i; 1228dd0a0fcdSRichard Henderson 1229dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1230dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1231dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1232dd0a0fcdSRichard Henderson int8_t dd = aa < bb ? aa : bb; 1233dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1234dd0a0fcdSRichard Henderson } 1235dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1236dd0a0fcdSRichard Henderson } 1237dd0a0fcdSRichard Henderson 1238dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1239dd0a0fcdSRichard Henderson { 1240dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1241dd0a0fcdSRichard Henderson intptr_t i; 1242dd0a0fcdSRichard Henderson 1243dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1244dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1245dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1246dd0a0fcdSRichard Henderson int16_t dd = aa < bb ? aa : bb; 1247dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1248dd0a0fcdSRichard Henderson } 1249dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1250dd0a0fcdSRichard Henderson } 1251dd0a0fcdSRichard Henderson 1252dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1253dd0a0fcdSRichard Henderson { 1254dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1255dd0a0fcdSRichard Henderson intptr_t i; 1256dd0a0fcdSRichard Henderson 1257dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1258dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1259dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1260dd0a0fcdSRichard Henderson int32_t dd = aa < bb ? aa : bb; 1261dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1262dd0a0fcdSRichard Henderson } 1263dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1264dd0a0fcdSRichard Henderson } 1265dd0a0fcdSRichard Henderson 1266dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1267dd0a0fcdSRichard Henderson { 1268dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1269dd0a0fcdSRichard Henderson intptr_t i; 1270dd0a0fcdSRichard Henderson 1271dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1272dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1273dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1274dd0a0fcdSRichard Henderson int64_t dd = aa < bb ? aa : bb; 1275dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1276dd0a0fcdSRichard Henderson } 1277dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1278dd0a0fcdSRichard Henderson } 1279dd0a0fcdSRichard Henderson 1280dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1281dd0a0fcdSRichard Henderson { 1282dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1283dd0a0fcdSRichard Henderson intptr_t i; 1284dd0a0fcdSRichard Henderson 1285dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1286dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1287dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1288dd0a0fcdSRichard Henderson int8_t dd = aa > bb ? aa : bb; 1289dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1290dd0a0fcdSRichard Henderson } 1291dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1292dd0a0fcdSRichard Henderson } 1293dd0a0fcdSRichard Henderson 1294dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1295dd0a0fcdSRichard Henderson { 1296dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1297dd0a0fcdSRichard Henderson intptr_t i; 1298dd0a0fcdSRichard Henderson 1299dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1300dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1301dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1302dd0a0fcdSRichard Henderson int16_t dd = aa > bb ? aa : bb; 1303dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1304dd0a0fcdSRichard Henderson } 1305dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1306dd0a0fcdSRichard Henderson } 1307dd0a0fcdSRichard Henderson 1308dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1309dd0a0fcdSRichard Henderson { 1310dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1311dd0a0fcdSRichard Henderson intptr_t i; 1312dd0a0fcdSRichard Henderson 1313dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1314dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1315dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1316dd0a0fcdSRichard Henderson int32_t dd = aa > bb ? aa : bb; 1317dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1318dd0a0fcdSRichard Henderson } 1319dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1320dd0a0fcdSRichard Henderson } 1321dd0a0fcdSRichard Henderson 1322dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1323dd0a0fcdSRichard Henderson { 1324dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1325dd0a0fcdSRichard Henderson intptr_t i; 1326dd0a0fcdSRichard Henderson 1327dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1328dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1329dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1330dd0a0fcdSRichard Henderson int64_t dd = aa > bb ? aa : bb; 1331dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1332dd0a0fcdSRichard Henderson } 1333dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1334dd0a0fcdSRichard Henderson } 1335dd0a0fcdSRichard Henderson 1336dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1337dd0a0fcdSRichard Henderson { 1338dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1339dd0a0fcdSRichard Henderson intptr_t i; 1340dd0a0fcdSRichard Henderson 1341dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1342dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1343dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1344dd0a0fcdSRichard Henderson uint8_t dd = aa < bb ? aa : bb; 1345dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1346dd0a0fcdSRichard Henderson } 1347dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1348dd0a0fcdSRichard Henderson } 1349dd0a0fcdSRichard Henderson 1350dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1351dd0a0fcdSRichard Henderson { 1352dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1353dd0a0fcdSRichard Henderson intptr_t i; 1354dd0a0fcdSRichard Henderson 1355dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1356dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1357dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1358dd0a0fcdSRichard Henderson uint16_t dd = aa < bb ? aa : bb; 1359dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1360dd0a0fcdSRichard Henderson } 1361dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1362dd0a0fcdSRichard Henderson } 1363dd0a0fcdSRichard Henderson 1364dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1365dd0a0fcdSRichard Henderson { 1366dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1367dd0a0fcdSRichard Henderson intptr_t i; 1368dd0a0fcdSRichard Henderson 1369dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1370dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1371dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1372dd0a0fcdSRichard Henderson uint32_t dd = aa < bb ? aa : bb; 1373dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1374dd0a0fcdSRichard Henderson } 1375dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1376dd0a0fcdSRichard Henderson } 1377dd0a0fcdSRichard Henderson 1378dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1379dd0a0fcdSRichard Henderson { 1380dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1381dd0a0fcdSRichard Henderson intptr_t i; 1382dd0a0fcdSRichard Henderson 1383dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1384dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1385dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1386dd0a0fcdSRichard Henderson uint64_t dd = aa < bb ? aa : bb; 1387dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1388dd0a0fcdSRichard Henderson } 1389dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1390dd0a0fcdSRichard Henderson } 1391dd0a0fcdSRichard Henderson 1392dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1393dd0a0fcdSRichard Henderson { 1394dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1395dd0a0fcdSRichard Henderson intptr_t i; 1396dd0a0fcdSRichard Henderson 1397dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1398dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1399dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1400dd0a0fcdSRichard Henderson uint8_t dd = aa > bb ? aa : bb; 1401dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1402dd0a0fcdSRichard Henderson } 1403dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1404dd0a0fcdSRichard Henderson } 1405dd0a0fcdSRichard Henderson 1406dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1407dd0a0fcdSRichard Henderson { 1408dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1409dd0a0fcdSRichard Henderson intptr_t i; 1410dd0a0fcdSRichard Henderson 1411dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1412dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1413dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1414dd0a0fcdSRichard Henderson uint16_t dd = aa > bb ? aa : bb; 1415dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1416dd0a0fcdSRichard Henderson } 1417dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1418dd0a0fcdSRichard Henderson } 1419dd0a0fcdSRichard Henderson 1420dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1421dd0a0fcdSRichard Henderson { 1422dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1423dd0a0fcdSRichard Henderson intptr_t i; 1424dd0a0fcdSRichard Henderson 1425dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1426dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1427dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1428dd0a0fcdSRichard Henderson uint32_t dd = aa > bb ? aa : bb; 1429dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1430dd0a0fcdSRichard Henderson } 1431dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1432dd0a0fcdSRichard Henderson } 1433dd0a0fcdSRichard Henderson 1434dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1435dd0a0fcdSRichard Henderson { 1436dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1437dd0a0fcdSRichard Henderson intptr_t i; 1438dd0a0fcdSRichard Henderson 1439dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1440dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1441dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1442dd0a0fcdSRichard Henderson uint64_t dd = aa > bb ? aa : bb; 1443dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1444dd0a0fcdSRichard Henderson } 1445dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1446dd0a0fcdSRichard Henderson } 144738dc1294SRichard Henderson 144838dc1294SRichard Henderson void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) 144938dc1294SRichard Henderson { 145038dc1294SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 145138dc1294SRichard Henderson intptr_t i; 145238dc1294SRichard Henderson 145338dc1294SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 145438dc1294SRichard Henderson vec64 aa = *(vec64 *)(a + i); 145538dc1294SRichard Henderson vec64 bb = *(vec64 *)(b + i); 145638dc1294SRichard Henderson vec64 cc = *(vec64 *)(c + i); 145738dc1294SRichard Henderson *(vec64 *)(d + i) = (bb & aa) | (cc & ~aa); 145838dc1294SRichard Henderson } 145938dc1294SRichard Henderson clear_high(d, oprsz, desc); 146038dc1294SRichard Henderson } 1461