1db432672SRichard Henderson /* 2db432672SRichard Henderson * Generic vectorized operation runtime 3db432672SRichard Henderson * 4db432672SRichard Henderson * Copyright (c) 2018 Linaro 5db432672SRichard Henderson * 6db432672SRichard Henderson * This library is free software; you can redistribute it and/or 7db432672SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8db432672SRichard Henderson * License as published by the Free Software Foundation; either 9fb0343d5SThomas Huth * version 2.1 of the License, or (at your option) any later version. 10db432672SRichard Henderson * 11db432672SRichard Henderson * This library is distributed in the hope that it will be useful, 12db432672SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13db432672SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14db432672SRichard Henderson * Lesser General Public License for more details. 15db432672SRichard Henderson * 16db432672SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17db432672SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18db432672SRichard Henderson */ 19db432672SRichard Henderson 20db432672SRichard Henderson #include "qemu/osdep.h" 21db432672SRichard Henderson #include "qemu/host-utils.h" 22db432672SRichard Henderson #include "cpu.h" 23db432672SRichard Henderson #include "exec/helper-proto.h" 24db432672SRichard Henderson #include "tcg-gvec-desc.h" 25db432672SRichard Henderson 26db432672SRichard Henderson 27db432672SRichard Henderson /* Virtually all hosts support 16-byte vectors. Those that don't can emulate 28db432672SRichard Henderson * them via GCC's generic vector extension. This turns out to be simpler and 29db432672SRichard Henderson * more reliable than getting the compiler to autovectorize. 30db432672SRichard Henderson * 31db432672SRichard Henderson * In tcg-op-gvec.c, we asserted that both the size and alignment of the data 32db432672SRichard Henderson * are multiples of 16. 33db432672SRichard Henderson * 34db432672SRichard Henderson * When the compiler does not support all of the operations we require, the 35db432672SRichard Henderson * loops are written so that we can always fall back on the base types. 36db432672SRichard Henderson */ 37db432672SRichard Henderson #ifdef CONFIG_VECTOR16 38db432672SRichard Henderson typedef uint8_t vec8 __attribute__((vector_size(16))); 39db432672SRichard Henderson typedef uint16_t vec16 __attribute__((vector_size(16))); 40db432672SRichard Henderson typedef uint32_t vec32 __attribute__((vector_size(16))); 41db432672SRichard Henderson typedef uint64_t vec64 __attribute__((vector_size(16))); 42db432672SRichard Henderson 43db432672SRichard Henderson typedef int8_t svec8 __attribute__((vector_size(16))); 44db432672SRichard Henderson typedef int16_t svec16 __attribute__((vector_size(16))); 45db432672SRichard Henderson typedef int32_t svec32 __attribute__((vector_size(16))); 46db432672SRichard Henderson typedef int64_t svec64 __attribute__((vector_size(16))); 47db432672SRichard Henderson 48db432672SRichard Henderson #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X } 49db432672SRichard Henderson #define DUP8(X) { X, X, X, X, X, X, X, X } 50db432672SRichard Henderson #define DUP4(X) { X, X, X, X } 51db432672SRichard Henderson #define DUP2(X) { X, X } 52db432672SRichard Henderson #else 53db432672SRichard Henderson typedef uint8_t vec8; 54db432672SRichard Henderson typedef uint16_t vec16; 55db432672SRichard Henderson typedef uint32_t vec32; 56db432672SRichard Henderson typedef uint64_t vec64; 57db432672SRichard Henderson 58db432672SRichard Henderson typedef int8_t svec8; 59db432672SRichard Henderson typedef int16_t svec16; 60db432672SRichard Henderson typedef int32_t svec32; 61db432672SRichard Henderson typedef int64_t svec64; 62db432672SRichard Henderson 63db432672SRichard Henderson #define DUP16(X) X 64db432672SRichard Henderson #define DUP8(X) X 65db432672SRichard Henderson #define DUP4(X) X 66db432672SRichard Henderson #define DUP2(X) X 67db432672SRichard Henderson #endif /* CONFIG_VECTOR16 */ 68db432672SRichard Henderson 69db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 70db432672SRichard Henderson { 71db432672SRichard Henderson intptr_t maxsz = simd_maxsz(desc); 72db432672SRichard Henderson intptr_t i; 73db432672SRichard Henderson 74db432672SRichard Henderson if (unlikely(maxsz > oprsz)) { 75db432672SRichard Henderson for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 76db432672SRichard Henderson *(uint64_t *)(d + i) = 0; 77db432672SRichard Henderson } 78db432672SRichard Henderson } 79db432672SRichard Henderson } 80db432672SRichard Henderson 81db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 82db432672SRichard Henderson { 83db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 84db432672SRichard Henderson intptr_t i; 85db432672SRichard Henderson 86db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 87db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i); 88db432672SRichard Henderson } 89db432672SRichard Henderson clear_high(d, oprsz, desc); 90db432672SRichard Henderson } 91db432672SRichard Henderson 92db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 93db432672SRichard Henderson { 94db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 95db432672SRichard Henderson intptr_t i; 96db432672SRichard Henderson 97db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 98db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i); 99db432672SRichard Henderson } 100db432672SRichard Henderson clear_high(d, oprsz, desc); 101db432672SRichard Henderson } 102db432672SRichard Henderson 103db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 104db432672SRichard Henderson { 105db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 106db432672SRichard Henderson intptr_t i; 107db432672SRichard Henderson 108db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 109db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i); 110db432672SRichard Henderson } 111db432672SRichard Henderson clear_high(d, oprsz, desc); 112db432672SRichard Henderson } 113db432672SRichard Henderson 114db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 115db432672SRichard Henderson { 116db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 117db432672SRichard Henderson intptr_t i; 118db432672SRichard Henderson 119db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 120db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i); 121db432672SRichard Henderson } 122db432672SRichard Henderson clear_high(d, oprsz, desc); 123db432672SRichard Henderson } 124db432672SRichard Henderson 12522fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 12622fc3527SRichard Henderson { 12722fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 12822fc3527SRichard Henderson vec8 vecb = (vec8)DUP16(b); 12922fc3527SRichard Henderson intptr_t i; 13022fc3527SRichard Henderson 13122fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 13222fc3527SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb; 13322fc3527SRichard Henderson } 13422fc3527SRichard Henderson clear_high(d, oprsz, desc); 13522fc3527SRichard Henderson } 13622fc3527SRichard Henderson 13722fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 13822fc3527SRichard Henderson { 13922fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 14022fc3527SRichard Henderson vec16 vecb = (vec16)DUP8(b); 14122fc3527SRichard Henderson intptr_t i; 14222fc3527SRichard Henderson 14322fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 14422fc3527SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb; 14522fc3527SRichard Henderson } 14622fc3527SRichard Henderson clear_high(d, oprsz, desc); 14722fc3527SRichard Henderson } 14822fc3527SRichard Henderson 14922fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 15022fc3527SRichard Henderson { 15122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 15222fc3527SRichard Henderson vec32 vecb = (vec32)DUP4(b); 15322fc3527SRichard Henderson intptr_t i; 15422fc3527SRichard Henderson 15522fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 15622fc3527SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb; 15722fc3527SRichard Henderson } 15822fc3527SRichard Henderson clear_high(d, oprsz, desc); 15922fc3527SRichard Henderson } 16022fc3527SRichard Henderson 16122fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 16222fc3527SRichard Henderson { 16322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 16422fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 16522fc3527SRichard Henderson intptr_t i; 16622fc3527SRichard Henderson 16722fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 16822fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb; 16922fc3527SRichard Henderson } 17022fc3527SRichard Henderson clear_high(d, oprsz, desc); 17122fc3527SRichard Henderson } 17222fc3527SRichard Henderson 173db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 174db432672SRichard Henderson { 175db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 176db432672SRichard Henderson intptr_t i; 177db432672SRichard Henderson 178db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 179db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i); 180db432672SRichard Henderson } 181db432672SRichard Henderson clear_high(d, oprsz, desc); 182db432672SRichard Henderson } 183db432672SRichard Henderson 184db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 185db432672SRichard Henderson { 186db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 187db432672SRichard Henderson intptr_t i; 188db432672SRichard Henderson 189db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 190db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i); 191db432672SRichard Henderson } 192db432672SRichard Henderson clear_high(d, oprsz, desc); 193db432672SRichard Henderson } 194db432672SRichard Henderson 195db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 196db432672SRichard Henderson { 197db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 198db432672SRichard Henderson intptr_t i; 199db432672SRichard Henderson 200db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 201db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i); 202db432672SRichard Henderson } 203db432672SRichard Henderson clear_high(d, oprsz, desc); 204db432672SRichard Henderson } 205db432672SRichard Henderson 206db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 207db432672SRichard Henderson { 208db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 209db432672SRichard Henderson intptr_t i; 210db432672SRichard Henderson 211db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 212db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i); 213db432672SRichard Henderson } 214db432672SRichard Henderson clear_high(d, oprsz, desc); 215db432672SRichard Henderson } 216db432672SRichard Henderson 21722fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 21822fc3527SRichard Henderson { 21922fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 22022fc3527SRichard Henderson vec8 vecb = (vec8)DUP16(b); 22122fc3527SRichard Henderson intptr_t i; 22222fc3527SRichard Henderson 22322fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 22422fc3527SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb; 22522fc3527SRichard Henderson } 22622fc3527SRichard Henderson clear_high(d, oprsz, desc); 22722fc3527SRichard Henderson } 22822fc3527SRichard Henderson 22922fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 23022fc3527SRichard Henderson { 23122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 23222fc3527SRichard Henderson vec16 vecb = (vec16)DUP8(b); 23322fc3527SRichard Henderson intptr_t i; 23422fc3527SRichard Henderson 23522fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 23622fc3527SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb; 23722fc3527SRichard Henderson } 23822fc3527SRichard Henderson clear_high(d, oprsz, desc); 23922fc3527SRichard Henderson } 24022fc3527SRichard Henderson 24122fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 24222fc3527SRichard Henderson { 24322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 24422fc3527SRichard Henderson vec32 vecb = (vec32)DUP4(b); 24522fc3527SRichard Henderson intptr_t i; 24622fc3527SRichard Henderson 24722fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 24822fc3527SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb; 24922fc3527SRichard Henderson } 25022fc3527SRichard Henderson clear_high(d, oprsz, desc); 25122fc3527SRichard Henderson } 25222fc3527SRichard Henderson 25322fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 25422fc3527SRichard Henderson { 25522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 25622fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 25722fc3527SRichard Henderson intptr_t i; 25822fc3527SRichard Henderson 25922fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 26022fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb; 26122fc3527SRichard Henderson } 26222fc3527SRichard Henderson clear_high(d, oprsz, desc); 26322fc3527SRichard Henderson } 26422fc3527SRichard Henderson 2653774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 2663774030aSRichard Henderson { 2673774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2683774030aSRichard Henderson intptr_t i; 2693774030aSRichard Henderson 2703774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 2713774030aSRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i); 2723774030aSRichard Henderson } 2733774030aSRichard Henderson clear_high(d, oprsz, desc); 2743774030aSRichard Henderson } 2753774030aSRichard Henderson 2763774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 2773774030aSRichard Henderson { 2783774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2793774030aSRichard Henderson intptr_t i; 2803774030aSRichard Henderson 2813774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 2823774030aSRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i); 2833774030aSRichard Henderson } 2843774030aSRichard Henderson clear_high(d, oprsz, desc); 2853774030aSRichard Henderson } 2863774030aSRichard Henderson 2873774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 2883774030aSRichard Henderson { 2893774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2903774030aSRichard Henderson intptr_t i; 2913774030aSRichard Henderson 2923774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 2933774030aSRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i); 2943774030aSRichard Henderson } 2953774030aSRichard Henderson clear_high(d, oprsz, desc); 2963774030aSRichard Henderson } 2973774030aSRichard Henderson 2983774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 2993774030aSRichard Henderson { 3003774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 3013774030aSRichard Henderson intptr_t i; 3023774030aSRichard Henderson 3033774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 3043774030aSRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i); 3053774030aSRichard Henderson } 3063774030aSRichard Henderson clear_high(d, oprsz, desc); 3073774030aSRichard Henderson } 3083774030aSRichard Henderson 30922fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 31022fc3527SRichard Henderson { 31122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 31222fc3527SRichard Henderson vec8 vecb = (vec8)DUP16(b); 31322fc3527SRichard Henderson intptr_t i; 31422fc3527SRichard Henderson 31522fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 31622fc3527SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb; 31722fc3527SRichard Henderson } 31822fc3527SRichard Henderson clear_high(d, oprsz, desc); 31922fc3527SRichard Henderson } 32022fc3527SRichard Henderson 32122fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 32222fc3527SRichard Henderson { 32322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 32422fc3527SRichard Henderson vec16 vecb = (vec16)DUP8(b); 32522fc3527SRichard Henderson intptr_t i; 32622fc3527SRichard Henderson 32722fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 32822fc3527SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb; 32922fc3527SRichard Henderson } 33022fc3527SRichard Henderson clear_high(d, oprsz, desc); 33122fc3527SRichard Henderson } 33222fc3527SRichard Henderson 33322fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 33422fc3527SRichard Henderson { 33522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 33622fc3527SRichard Henderson vec32 vecb = (vec32)DUP4(b); 33722fc3527SRichard Henderson intptr_t i; 33822fc3527SRichard Henderson 33922fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 34022fc3527SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb; 34122fc3527SRichard Henderson } 34222fc3527SRichard Henderson clear_high(d, oprsz, desc); 34322fc3527SRichard Henderson } 34422fc3527SRichard Henderson 34522fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 34622fc3527SRichard Henderson { 34722fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 34822fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 34922fc3527SRichard Henderson intptr_t i; 35022fc3527SRichard Henderson 35122fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 35222fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb; 35322fc3527SRichard Henderson } 35422fc3527SRichard Henderson clear_high(d, oprsz, desc); 35522fc3527SRichard Henderson } 35622fc3527SRichard Henderson 357db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 358db432672SRichard Henderson { 359db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 360db432672SRichard Henderson intptr_t i; 361db432672SRichard Henderson 362db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 363db432672SRichard Henderson *(vec8 *)(d + i) = -*(vec8 *)(a + i); 364db432672SRichard Henderson } 365db432672SRichard Henderson clear_high(d, oprsz, desc); 366db432672SRichard Henderson } 367db432672SRichard Henderson 368db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 369db432672SRichard Henderson { 370db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 371db432672SRichard Henderson intptr_t i; 372db432672SRichard Henderson 373db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 374db432672SRichard Henderson *(vec16 *)(d + i) = -*(vec16 *)(a + i); 375db432672SRichard Henderson } 376db432672SRichard Henderson clear_high(d, oprsz, desc); 377db432672SRichard Henderson } 378db432672SRichard Henderson 379db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 380db432672SRichard Henderson { 381db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 382db432672SRichard Henderson intptr_t i; 383db432672SRichard Henderson 384db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 385db432672SRichard Henderson *(vec32 *)(d + i) = -*(vec32 *)(a + i); 386db432672SRichard Henderson } 387db432672SRichard Henderson clear_high(d, oprsz, desc); 388db432672SRichard Henderson } 389db432672SRichard Henderson 390db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 391db432672SRichard Henderson { 392db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 393db432672SRichard Henderson intptr_t i; 394db432672SRichard Henderson 395db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 396db432672SRichard Henderson *(vec64 *)(d + i) = -*(vec64 *)(a + i); 397db432672SRichard Henderson } 398db432672SRichard Henderson clear_high(d, oprsz, desc); 399db432672SRichard Henderson } 400db432672SRichard Henderson 401db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 402db432672SRichard Henderson { 403db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 404db432672SRichard Henderson 405db432672SRichard Henderson memcpy(d, a, oprsz); 406db432672SRichard Henderson clear_high(d, oprsz, desc); 407db432672SRichard Henderson } 408db432672SRichard Henderson 409db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 410db432672SRichard Henderson { 411db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 412db432672SRichard Henderson intptr_t i; 413db432672SRichard Henderson 414db432672SRichard Henderson if (c == 0) { 415db432672SRichard Henderson oprsz = 0; 416db432672SRichard Henderson } else { 417db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 418db432672SRichard Henderson *(uint64_t *)(d + i) = c; 419db432672SRichard Henderson } 420db432672SRichard Henderson } 421db432672SRichard Henderson clear_high(d, oprsz, desc); 422db432672SRichard Henderson } 423db432672SRichard Henderson 424db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 425db432672SRichard Henderson { 426db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 427db432672SRichard Henderson intptr_t i; 428db432672SRichard Henderson 429db432672SRichard Henderson if (c == 0) { 430db432672SRichard Henderson oprsz = 0; 431db432672SRichard Henderson } else { 432db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 433db432672SRichard Henderson *(uint32_t *)(d + i) = c; 434db432672SRichard Henderson } 435db432672SRichard Henderson } 436db432672SRichard Henderson clear_high(d, oprsz, desc); 437db432672SRichard Henderson } 438db432672SRichard Henderson 439db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 440db432672SRichard Henderson { 441db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 442db432672SRichard Henderson } 443db432672SRichard Henderson 444db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 445db432672SRichard Henderson { 446db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 447db432672SRichard Henderson } 448db432672SRichard Henderson 449db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 450db432672SRichard Henderson { 451db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 452db432672SRichard Henderson intptr_t i; 453db432672SRichard Henderson 454db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 455db432672SRichard Henderson *(vec64 *)(d + i) = ~*(vec64 *)(a + i); 456db432672SRichard Henderson } 457db432672SRichard Henderson clear_high(d, oprsz, desc); 458db432672SRichard Henderson } 459db432672SRichard Henderson 460db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 461db432672SRichard Henderson { 462db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 463db432672SRichard Henderson intptr_t i; 464db432672SRichard Henderson 465db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 466db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i); 467db432672SRichard Henderson } 468db432672SRichard Henderson clear_high(d, oprsz, desc); 469db432672SRichard Henderson } 470db432672SRichard Henderson 471db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 472db432672SRichard Henderson { 473db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 474db432672SRichard Henderson intptr_t i; 475db432672SRichard Henderson 476db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 477db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i); 478db432672SRichard Henderson } 479db432672SRichard Henderson clear_high(d, oprsz, desc); 480db432672SRichard Henderson } 481db432672SRichard Henderson 482db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 483db432672SRichard Henderson { 484db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 485db432672SRichard Henderson intptr_t i; 486db432672SRichard Henderson 487db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 488db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i); 489db432672SRichard Henderson } 490db432672SRichard Henderson clear_high(d, oprsz, desc); 491db432672SRichard Henderson } 492db432672SRichard Henderson 493db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 494db432672SRichard Henderson { 495db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 496db432672SRichard Henderson intptr_t i; 497db432672SRichard Henderson 498db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 499db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i); 500db432672SRichard Henderson } 501db432672SRichard Henderson clear_high(d, oprsz, desc); 502db432672SRichard Henderson } 503db432672SRichard Henderson 504db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 505db432672SRichard Henderson { 506db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 507db432672SRichard Henderson intptr_t i; 508db432672SRichard Henderson 509db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 510db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i); 511db432672SRichard Henderson } 512db432672SRichard Henderson clear_high(d, oprsz, desc); 513db432672SRichard Henderson } 514d0ec9796SRichard Henderson 515f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 516f550805dSRichard Henderson { 517f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 518f550805dSRichard Henderson intptr_t i; 519f550805dSRichard Henderson 520f550805dSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 521f550805dSRichard Henderson *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i)); 522f550805dSRichard Henderson } 523f550805dSRichard Henderson clear_high(d, oprsz, desc); 524f550805dSRichard Henderson } 525f550805dSRichard Henderson 526f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 527f550805dSRichard Henderson { 528f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 529f550805dSRichard Henderson intptr_t i; 530f550805dSRichard Henderson 531f550805dSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 532f550805dSRichard Henderson *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i)); 533f550805dSRichard Henderson } 534f550805dSRichard Henderson clear_high(d, oprsz, desc); 535f550805dSRichard Henderson } 536f550805dSRichard Henderson 537f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 538f550805dSRichard Henderson { 539f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 540f550805dSRichard Henderson intptr_t i; 541f550805dSRichard Henderson 542f550805dSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 543f550805dSRichard Henderson *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i)); 544f550805dSRichard Henderson } 545f550805dSRichard Henderson clear_high(d, oprsz, desc); 546f550805dSRichard Henderson } 547f550805dSRichard Henderson 54822fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 54922fc3527SRichard Henderson { 55022fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 55122fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 55222fc3527SRichard Henderson intptr_t i; 55322fc3527SRichard Henderson 55422fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 55522fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb; 55622fc3527SRichard Henderson } 55722fc3527SRichard Henderson clear_high(d, oprsz, desc); 55822fc3527SRichard Henderson } 55922fc3527SRichard Henderson 56022fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 56122fc3527SRichard Henderson { 56222fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 56322fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 56422fc3527SRichard Henderson intptr_t i; 56522fc3527SRichard Henderson 56622fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 56722fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb; 56822fc3527SRichard Henderson } 56922fc3527SRichard Henderson clear_high(d, oprsz, desc); 57022fc3527SRichard Henderson } 57122fc3527SRichard Henderson 57222fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 57322fc3527SRichard Henderson { 57422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 57522fc3527SRichard Henderson vec64 vecb = (vec64)DUP2(b); 57622fc3527SRichard Henderson intptr_t i; 57722fc3527SRichard Henderson 57822fc3527SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 57922fc3527SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb; 58022fc3527SRichard Henderson } 58122fc3527SRichard Henderson clear_high(d, oprsz, desc); 58222fc3527SRichard Henderson } 58322fc3527SRichard Henderson 584d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 585d0ec9796SRichard Henderson { 586d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 587d0ec9796SRichard Henderson int shift = simd_data(desc); 588d0ec9796SRichard Henderson intptr_t i; 589d0ec9796SRichard Henderson 590d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 591d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift; 592d0ec9796SRichard Henderson } 593d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 594d0ec9796SRichard Henderson } 595d0ec9796SRichard Henderson 596d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 597d0ec9796SRichard Henderson { 598d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 599d0ec9796SRichard Henderson int shift = simd_data(desc); 600d0ec9796SRichard Henderson intptr_t i; 601d0ec9796SRichard Henderson 602d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 603d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift; 604d0ec9796SRichard Henderson } 605d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 606d0ec9796SRichard Henderson } 607d0ec9796SRichard Henderson 608d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 609d0ec9796SRichard Henderson { 610d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 611d0ec9796SRichard Henderson int shift = simd_data(desc); 612d0ec9796SRichard Henderson intptr_t i; 613d0ec9796SRichard Henderson 614d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 615d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift; 616d0ec9796SRichard Henderson } 617d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 618d0ec9796SRichard Henderson } 619d0ec9796SRichard Henderson 620d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 621d0ec9796SRichard Henderson { 622d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 623d0ec9796SRichard Henderson int shift = simd_data(desc); 624d0ec9796SRichard Henderson intptr_t i; 625d0ec9796SRichard Henderson 626d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 627d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift; 628d0ec9796SRichard Henderson } 629d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 630d0ec9796SRichard Henderson } 631d0ec9796SRichard Henderson 632d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 633d0ec9796SRichard Henderson { 634d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 635d0ec9796SRichard Henderson int shift = simd_data(desc); 636d0ec9796SRichard Henderson intptr_t i; 637d0ec9796SRichard Henderson 638d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 639d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift; 640d0ec9796SRichard Henderson } 641d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 642d0ec9796SRichard Henderson } 643d0ec9796SRichard Henderson 644d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 645d0ec9796SRichard Henderson { 646d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 647d0ec9796SRichard Henderson int shift = simd_data(desc); 648d0ec9796SRichard Henderson intptr_t i; 649d0ec9796SRichard Henderson 650d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 651d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift; 652d0ec9796SRichard Henderson } 653d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 654d0ec9796SRichard Henderson } 655d0ec9796SRichard Henderson 656d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 657d0ec9796SRichard Henderson { 658d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 659d0ec9796SRichard Henderson int shift = simd_data(desc); 660d0ec9796SRichard Henderson intptr_t i; 661d0ec9796SRichard Henderson 662d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 663d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift; 664d0ec9796SRichard Henderson } 665d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 666d0ec9796SRichard Henderson } 667d0ec9796SRichard Henderson 668d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 669d0ec9796SRichard Henderson { 670d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 671d0ec9796SRichard Henderson int shift = simd_data(desc); 672d0ec9796SRichard Henderson intptr_t i; 673d0ec9796SRichard Henderson 674d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 675d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift; 676d0ec9796SRichard Henderson } 677d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 678d0ec9796SRichard Henderson } 679d0ec9796SRichard Henderson 680d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 681d0ec9796SRichard Henderson { 682d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 683d0ec9796SRichard Henderson int shift = simd_data(desc); 684d0ec9796SRichard Henderson intptr_t i; 685d0ec9796SRichard Henderson 686d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 687d0ec9796SRichard Henderson *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift; 688d0ec9796SRichard Henderson } 689d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 690d0ec9796SRichard Henderson } 691d0ec9796SRichard Henderson 692d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 693d0ec9796SRichard Henderson { 694d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 695d0ec9796SRichard Henderson int shift = simd_data(desc); 696d0ec9796SRichard Henderson intptr_t i; 697d0ec9796SRichard Henderson 698d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 699d0ec9796SRichard Henderson *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift; 700d0ec9796SRichard Henderson } 701d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 702d0ec9796SRichard Henderson } 703d0ec9796SRichard Henderson 704d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 705d0ec9796SRichard Henderson { 706d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 707d0ec9796SRichard Henderson int shift = simd_data(desc); 708d0ec9796SRichard Henderson intptr_t i; 709d0ec9796SRichard Henderson 710d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 711d0ec9796SRichard Henderson *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift; 712d0ec9796SRichard Henderson } 713d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 714d0ec9796SRichard Henderson } 715d0ec9796SRichard Henderson 716d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 717d0ec9796SRichard Henderson { 718d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 719d0ec9796SRichard Henderson int shift = simd_data(desc); 720d0ec9796SRichard Henderson intptr_t i; 721d0ec9796SRichard Henderson 722d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 723d0ec9796SRichard Henderson *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift; 724d0ec9796SRichard Henderson } 725d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 726d0ec9796SRichard Henderson } 727212be173SRichard Henderson 728*5ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 729*5ee5c14cSRichard Henderson { 730*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 731*5ee5c14cSRichard Henderson intptr_t i; 732*5ee5c14cSRichard Henderson 733*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 734*5ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 735*5ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 736*5ee5c14cSRichard Henderson } 737*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 738*5ee5c14cSRichard Henderson } 739*5ee5c14cSRichard Henderson 740*5ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 741*5ee5c14cSRichard Henderson { 742*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 743*5ee5c14cSRichard Henderson intptr_t i; 744*5ee5c14cSRichard Henderson 745*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 746*5ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 747*5ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 748*5ee5c14cSRichard Henderson } 749*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 750*5ee5c14cSRichard Henderson } 751*5ee5c14cSRichard Henderson 752*5ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 753*5ee5c14cSRichard Henderson { 754*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 755*5ee5c14cSRichard Henderson intptr_t i; 756*5ee5c14cSRichard Henderson 757*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 758*5ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 759*5ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 760*5ee5c14cSRichard Henderson } 761*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 762*5ee5c14cSRichard Henderson } 763*5ee5c14cSRichard Henderson 764*5ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 765*5ee5c14cSRichard Henderson { 766*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 767*5ee5c14cSRichard Henderson intptr_t i; 768*5ee5c14cSRichard Henderson 769*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 770*5ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 771*5ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 772*5ee5c14cSRichard Henderson } 773*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 774*5ee5c14cSRichard Henderson } 775*5ee5c14cSRichard Henderson 776*5ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 777*5ee5c14cSRichard Henderson { 778*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 779*5ee5c14cSRichard Henderson intptr_t i; 780*5ee5c14cSRichard Henderson 781*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 782*5ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 783*5ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 784*5ee5c14cSRichard Henderson } 785*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 786*5ee5c14cSRichard Henderson } 787*5ee5c14cSRichard Henderson 788*5ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 789*5ee5c14cSRichard Henderson { 790*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 791*5ee5c14cSRichard Henderson intptr_t i; 792*5ee5c14cSRichard Henderson 793*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 794*5ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 795*5ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 796*5ee5c14cSRichard Henderson } 797*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 798*5ee5c14cSRichard Henderson } 799*5ee5c14cSRichard Henderson 800*5ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 801*5ee5c14cSRichard Henderson { 802*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 803*5ee5c14cSRichard Henderson intptr_t i; 804*5ee5c14cSRichard Henderson 805*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 806*5ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 807*5ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 808*5ee5c14cSRichard Henderson } 809*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 810*5ee5c14cSRichard Henderson } 811*5ee5c14cSRichard Henderson 812*5ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 813*5ee5c14cSRichard Henderson { 814*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 815*5ee5c14cSRichard Henderson intptr_t i; 816*5ee5c14cSRichard Henderson 817*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 818*5ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 819*5ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 820*5ee5c14cSRichard Henderson } 821*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 822*5ee5c14cSRichard Henderson } 823*5ee5c14cSRichard Henderson 824*5ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 825*5ee5c14cSRichard Henderson { 826*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 827*5ee5c14cSRichard Henderson intptr_t i; 828*5ee5c14cSRichard Henderson 829*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 830*5ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 831*5ee5c14cSRichard Henderson *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 832*5ee5c14cSRichard Henderson } 833*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 834*5ee5c14cSRichard Henderson } 835*5ee5c14cSRichard Henderson 836*5ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 837*5ee5c14cSRichard Henderson { 838*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 839*5ee5c14cSRichard Henderson intptr_t i; 840*5ee5c14cSRichard Henderson 841*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 842*5ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 843*5ee5c14cSRichard Henderson *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 844*5ee5c14cSRichard Henderson } 845*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 846*5ee5c14cSRichard Henderson } 847*5ee5c14cSRichard Henderson 848*5ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 849*5ee5c14cSRichard Henderson { 850*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 851*5ee5c14cSRichard Henderson intptr_t i; 852*5ee5c14cSRichard Henderson 853*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 854*5ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 855*5ee5c14cSRichard Henderson *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 856*5ee5c14cSRichard Henderson } 857*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 858*5ee5c14cSRichard Henderson } 859*5ee5c14cSRichard Henderson 860*5ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 861*5ee5c14cSRichard Henderson { 862*5ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 863*5ee5c14cSRichard Henderson intptr_t i; 864*5ee5c14cSRichard Henderson 865*5ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 866*5ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 867*5ee5c14cSRichard Henderson *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 868*5ee5c14cSRichard Henderson } 869*5ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 870*5ee5c14cSRichard Henderson } 871*5ee5c14cSRichard Henderson 872212be173SRichard Henderson /* If vectors are enabled, the compiler fills in -1 for true. 873212be173SRichard Henderson Otherwise, we must take care of this by hand. */ 874212be173SRichard Henderson #ifdef CONFIG_VECTOR16 875212be173SRichard Henderson # define DO_CMP0(X) X 876212be173SRichard Henderson #else 877212be173SRichard Henderson # define DO_CMP0(X) -(X) 878212be173SRichard Henderson #endif 879212be173SRichard Henderson 880212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP) \ 881212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 882212be173SRichard Henderson { \ 883212be173SRichard Henderson intptr_t oprsz = simd_oprsz(desc); \ 884212be173SRichard Henderson intptr_t i; \ 8856cb1d3b8SRichard Henderson for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 886212be173SRichard Henderson *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 887212be173SRichard Henderson } \ 888212be173SRichard Henderson clear_high(d, oprsz, desc); \ 889212be173SRichard Henderson } 890212be173SRichard Henderson 891212be173SRichard Henderson #define DO_CMP2(SZ) \ 892212be173SRichard Henderson DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \ 893212be173SRichard Henderson DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \ 894212be173SRichard Henderson DO_CMP1(gvec_lt##SZ, svec##SZ, <) \ 895212be173SRichard Henderson DO_CMP1(gvec_le##SZ, svec##SZ, <=) \ 896212be173SRichard Henderson DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \ 897212be173SRichard Henderson DO_CMP1(gvec_leu##SZ, vec##SZ, <=) 898212be173SRichard Henderson 899212be173SRichard Henderson DO_CMP2(8) 900212be173SRichard Henderson DO_CMP2(16) 901212be173SRichard Henderson DO_CMP2(32) 902212be173SRichard Henderson DO_CMP2(64) 903212be173SRichard Henderson 904212be173SRichard Henderson #undef DO_CMP0 905212be173SRichard Henderson #undef DO_CMP1 906212be173SRichard Henderson #undef DO_CMP2 907f49b12c6SRichard Henderson 908f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 909f49b12c6SRichard Henderson { 910f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 911f49b12c6SRichard Henderson intptr_t i; 912f49b12c6SRichard Henderson 913f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 914f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 915f49b12c6SRichard Henderson if (r > INT8_MAX) { 916f49b12c6SRichard Henderson r = INT8_MAX; 917f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 918f49b12c6SRichard Henderson r = INT8_MIN; 919f49b12c6SRichard Henderson } 920f49b12c6SRichard Henderson *(int8_t *)(d + i) = r; 921f49b12c6SRichard Henderson } 922f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 923f49b12c6SRichard Henderson } 924f49b12c6SRichard Henderson 925f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 926f49b12c6SRichard Henderson { 927f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 928f49b12c6SRichard Henderson intptr_t i; 929f49b12c6SRichard Henderson 930f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 931f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 932f49b12c6SRichard Henderson if (r > INT16_MAX) { 933f49b12c6SRichard Henderson r = INT16_MAX; 934f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 935f49b12c6SRichard Henderson r = INT16_MIN; 936f49b12c6SRichard Henderson } 937f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 938f49b12c6SRichard Henderson } 939f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 940f49b12c6SRichard Henderson } 941f49b12c6SRichard Henderson 942f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 943f49b12c6SRichard Henderson { 944f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 945f49b12c6SRichard Henderson intptr_t i; 946f49b12c6SRichard Henderson 947f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 948f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 949f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 950f49b12c6SRichard Henderson int32_t di = ai + bi; 951f49b12c6SRichard Henderson if (((di ^ ai) &~ (ai ^ bi)) < 0) { 952f49b12c6SRichard Henderson /* Signed overflow. */ 953f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 954f49b12c6SRichard Henderson } 955f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 956f49b12c6SRichard Henderson } 957f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 958f49b12c6SRichard Henderson } 959f49b12c6SRichard Henderson 960f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 961f49b12c6SRichard Henderson { 962f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 963f49b12c6SRichard Henderson intptr_t i; 964f49b12c6SRichard Henderson 965f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 966f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 967f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 968f49b12c6SRichard Henderson int64_t di = ai + bi; 969f49b12c6SRichard Henderson if (((di ^ ai) &~ (ai ^ bi)) < 0) { 970f49b12c6SRichard Henderson /* Signed overflow. */ 971f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 972f49b12c6SRichard Henderson } 973f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 974f49b12c6SRichard Henderson } 975f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 976f49b12c6SRichard Henderson } 977f49b12c6SRichard Henderson 978f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 979f49b12c6SRichard Henderson { 980f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 981f49b12c6SRichard Henderson intptr_t i; 982f49b12c6SRichard Henderson 983f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 984f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 985f49b12c6SRichard Henderson if (r > INT8_MAX) { 986f49b12c6SRichard Henderson r = INT8_MAX; 987f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 988f49b12c6SRichard Henderson r = INT8_MIN; 989f49b12c6SRichard Henderson } 990f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 991f49b12c6SRichard Henderson } 992f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 993f49b12c6SRichard Henderson } 994f49b12c6SRichard Henderson 995f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 996f49b12c6SRichard Henderson { 997f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 998f49b12c6SRichard Henderson intptr_t i; 999f49b12c6SRichard Henderson 1000f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1001f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 1002f49b12c6SRichard Henderson if (r > INT16_MAX) { 1003f49b12c6SRichard Henderson r = INT16_MAX; 1004f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 1005f49b12c6SRichard Henderson r = INT16_MIN; 1006f49b12c6SRichard Henderson } 1007f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 1008f49b12c6SRichard Henderson } 1009f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1010f49b12c6SRichard Henderson } 1011f49b12c6SRichard Henderson 1012f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 1013f49b12c6SRichard Henderson { 1014f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1015f49b12c6SRichard Henderson intptr_t i; 1016f49b12c6SRichard Henderson 1017f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1018f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 1019f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 1020f49b12c6SRichard Henderson int32_t di = ai - bi; 1021f49b12c6SRichard Henderson if (((di ^ ai) & (ai ^ bi)) < 0) { 1022f49b12c6SRichard Henderson /* Signed overflow. */ 1023f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 1024f49b12c6SRichard Henderson } 1025f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 1026f49b12c6SRichard Henderson } 1027f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1028f49b12c6SRichard Henderson } 1029f49b12c6SRichard Henderson 1030f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1031f49b12c6SRichard Henderson { 1032f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1033f49b12c6SRichard Henderson intptr_t i; 1034f49b12c6SRichard Henderson 1035f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1036f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 1037f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 1038f49b12c6SRichard Henderson int64_t di = ai - bi; 1039f49b12c6SRichard Henderson if (((di ^ ai) & (ai ^ bi)) < 0) { 1040f49b12c6SRichard Henderson /* Signed overflow. */ 1041f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 1042f49b12c6SRichard Henderson } 1043f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 1044f49b12c6SRichard Henderson } 1045f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1046f49b12c6SRichard Henderson } 1047f49b12c6SRichard Henderson 1048f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1049f49b12c6SRichard Henderson { 1050f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1051f49b12c6SRichard Henderson intptr_t i; 1052f49b12c6SRichard Henderson 1053f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1054f49b12c6SRichard Henderson unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1055f49b12c6SRichard Henderson if (r > UINT8_MAX) { 1056f49b12c6SRichard Henderson r = UINT8_MAX; 1057f49b12c6SRichard Henderson } 1058f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1059f49b12c6SRichard Henderson } 1060f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1061f49b12c6SRichard Henderson } 1062f49b12c6SRichard Henderson 1063f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1064f49b12c6SRichard Henderson { 1065f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1066f49b12c6SRichard Henderson intptr_t i; 1067f49b12c6SRichard Henderson 1068f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1069f49b12c6SRichard Henderson unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1070f49b12c6SRichard Henderson if (r > UINT16_MAX) { 1071f49b12c6SRichard Henderson r = UINT16_MAX; 1072f49b12c6SRichard Henderson } 1073f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 1074f49b12c6SRichard Henderson } 1075f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1076f49b12c6SRichard Henderson } 1077f49b12c6SRichard Henderson 1078f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1079f49b12c6SRichard Henderson { 1080f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1081f49b12c6SRichard Henderson intptr_t i; 1082f49b12c6SRichard Henderson 1083f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1084f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1085f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 1086f49b12c6SRichard Henderson uint32_t di = ai + bi; 1087f49b12c6SRichard Henderson if (di < ai) { 1088f49b12c6SRichard Henderson di = UINT32_MAX; 1089f49b12c6SRichard Henderson } 1090f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1091f49b12c6SRichard Henderson } 1092f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1093f49b12c6SRichard Henderson } 1094f49b12c6SRichard Henderson 1095f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1096f49b12c6SRichard Henderson { 1097f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1098f49b12c6SRichard Henderson intptr_t i; 1099f49b12c6SRichard Henderson 1100f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1101f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1102f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 1103f49b12c6SRichard Henderson uint64_t di = ai + bi; 1104f49b12c6SRichard Henderson if (di < ai) { 1105f49b12c6SRichard Henderson di = UINT64_MAX; 1106f49b12c6SRichard Henderson } 1107f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1108f49b12c6SRichard Henderson } 1109f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1110f49b12c6SRichard Henderson } 1111f49b12c6SRichard Henderson 1112f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1113f49b12c6SRichard Henderson { 1114f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1115f49b12c6SRichard Henderson intptr_t i; 1116f49b12c6SRichard Henderson 1117f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1118f49b12c6SRichard Henderson int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1119f49b12c6SRichard Henderson if (r < 0) { 1120f49b12c6SRichard Henderson r = 0; 1121f49b12c6SRichard Henderson } 1122f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1123f49b12c6SRichard Henderson } 1124f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1125f49b12c6SRichard Henderson } 1126f49b12c6SRichard Henderson 1127f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1128f49b12c6SRichard Henderson { 1129f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1130f49b12c6SRichard Henderson intptr_t i; 1131f49b12c6SRichard Henderson 1132f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1133f49b12c6SRichard Henderson int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1134f49b12c6SRichard Henderson if (r < 0) { 1135f49b12c6SRichard Henderson r = 0; 1136f49b12c6SRichard Henderson } 1137f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 1138f49b12c6SRichard Henderson } 1139f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1140f49b12c6SRichard Henderson } 1141f49b12c6SRichard Henderson 1142f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1143f49b12c6SRichard Henderson { 1144f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1145f49b12c6SRichard Henderson intptr_t i; 1146f49b12c6SRichard Henderson 1147f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1148f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1149f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 1150f49b12c6SRichard Henderson uint32_t di = ai - bi; 1151f49b12c6SRichard Henderson if (ai < bi) { 1152f49b12c6SRichard Henderson di = 0; 1153f49b12c6SRichard Henderson } 1154f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1155f49b12c6SRichard Henderson } 1156f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1157f49b12c6SRichard Henderson } 1158f49b12c6SRichard Henderson 1159f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1160f49b12c6SRichard Henderson { 1161f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1162f49b12c6SRichard Henderson intptr_t i; 1163f49b12c6SRichard Henderson 1164f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1165f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1166f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 1167f49b12c6SRichard Henderson uint64_t di = ai - bi; 1168f49b12c6SRichard Henderson if (ai < bi) { 1169f49b12c6SRichard Henderson di = 0; 1170f49b12c6SRichard Henderson } 1171f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1172f49b12c6SRichard Henderson } 1173f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1174f49b12c6SRichard Henderson } 1175dd0a0fcdSRichard Henderson 1176dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1177dd0a0fcdSRichard Henderson { 1178dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1179dd0a0fcdSRichard Henderson intptr_t i; 1180dd0a0fcdSRichard Henderson 1181dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1182dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1183dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1184dd0a0fcdSRichard Henderson int8_t dd = aa < bb ? aa : bb; 1185dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1186dd0a0fcdSRichard Henderson } 1187dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1188dd0a0fcdSRichard Henderson } 1189dd0a0fcdSRichard Henderson 1190dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1191dd0a0fcdSRichard Henderson { 1192dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1193dd0a0fcdSRichard Henderson intptr_t i; 1194dd0a0fcdSRichard Henderson 1195dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1196dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1197dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1198dd0a0fcdSRichard Henderson int16_t dd = aa < bb ? aa : bb; 1199dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1200dd0a0fcdSRichard Henderson } 1201dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1202dd0a0fcdSRichard Henderson } 1203dd0a0fcdSRichard Henderson 1204dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1205dd0a0fcdSRichard Henderson { 1206dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1207dd0a0fcdSRichard Henderson intptr_t i; 1208dd0a0fcdSRichard Henderson 1209dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1210dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1211dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1212dd0a0fcdSRichard Henderson int32_t dd = aa < bb ? aa : bb; 1213dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1214dd0a0fcdSRichard Henderson } 1215dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1216dd0a0fcdSRichard Henderson } 1217dd0a0fcdSRichard Henderson 1218dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1219dd0a0fcdSRichard Henderson { 1220dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1221dd0a0fcdSRichard Henderson intptr_t i; 1222dd0a0fcdSRichard Henderson 1223dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1224dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1225dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1226dd0a0fcdSRichard Henderson int64_t dd = aa < bb ? aa : bb; 1227dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1228dd0a0fcdSRichard Henderson } 1229dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1230dd0a0fcdSRichard Henderson } 1231dd0a0fcdSRichard Henderson 1232dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1233dd0a0fcdSRichard Henderson { 1234dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1235dd0a0fcdSRichard Henderson intptr_t i; 1236dd0a0fcdSRichard Henderson 1237dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1238dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1239dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1240dd0a0fcdSRichard Henderson int8_t dd = aa > bb ? aa : bb; 1241dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1242dd0a0fcdSRichard Henderson } 1243dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1244dd0a0fcdSRichard Henderson } 1245dd0a0fcdSRichard Henderson 1246dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1247dd0a0fcdSRichard Henderson { 1248dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1249dd0a0fcdSRichard Henderson intptr_t i; 1250dd0a0fcdSRichard Henderson 1251dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1252dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1253dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1254dd0a0fcdSRichard Henderson int16_t dd = aa > bb ? aa : bb; 1255dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1256dd0a0fcdSRichard Henderson } 1257dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1258dd0a0fcdSRichard Henderson } 1259dd0a0fcdSRichard Henderson 1260dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1261dd0a0fcdSRichard Henderson { 1262dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1263dd0a0fcdSRichard Henderson intptr_t i; 1264dd0a0fcdSRichard Henderson 1265dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1266dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1267dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1268dd0a0fcdSRichard Henderson int32_t dd = aa > bb ? aa : bb; 1269dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1270dd0a0fcdSRichard Henderson } 1271dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1272dd0a0fcdSRichard Henderson } 1273dd0a0fcdSRichard Henderson 1274dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1275dd0a0fcdSRichard Henderson { 1276dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1277dd0a0fcdSRichard Henderson intptr_t i; 1278dd0a0fcdSRichard Henderson 1279dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1280dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1281dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1282dd0a0fcdSRichard Henderson int64_t dd = aa > bb ? aa : bb; 1283dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1284dd0a0fcdSRichard Henderson } 1285dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1286dd0a0fcdSRichard Henderson } 1287dd0a0fcdSRichard Henderson 1288dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1289dd0a0fcdSRichard Henderson { 1290dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1291dd0a0fcdSRichard Henderson intptr_t i; 1292dd0a0fcdSRichard Henderson 1293dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1294dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1295dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1296dd0a0fcdSRichard Henderson uint8_t dd = aa < bb ? aa : bb; 1297dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1298dd0a0fcdSRichard Henderson } 1299dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1300dd0a0fcdSRichard Henderson } 1301dd0a0fcdSRichard Henderson 1302dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1303dd0a0fcdSRichard Henderson { 1304dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1305dd0a0fcdSRichard Henderson intptr_t i; 1306dd0a0fcdSRichard Henderson 1307dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1308dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1309dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1310dd0a0fcdSRichard Henderson uint16_t dd = aa < bb ? aa : bb; 1311dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1312dd0a0fcdSRichard Henderson } 1313dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1314dd0a0fcdSRichard Henderson } 1315dd0a0fcdSRichard Henderson 1316dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1317dd0a0fcdSRichard Henderson { 1318dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1319dd0a0fcdSRichard Henderson intptr_t i; 1320dd0a0fcdSRichard Henderson 1321dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1322dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1323dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1324dd0a0fcdSRichard Henderson uint32_t dd = aa < bb ? aa : bb; 1325dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1326dd0a0fcdSRichard Henderson } 1327dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1328dd0a0fcdSRichard Henderson } 1329dd0a0fcdSRichard Henderson 1330dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1331dd0a0fcdSRichard Henderson { 1332dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1333dd0a0fcdSRichard Henderson intptr_t i; 1334dd0a0fcdSRichard Henderson 1335dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1336dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1337dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1338dd0a0fcdSRichard Henderson uint64_t dd = aa < bb ? aa : bb; 1339dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1340dd0a0fcdSRichard Henderson } 1341dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1342dd0a0fcdSRichard Henderson } 1343dd0a0fcdSRichard Henderson 1344dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1345dd0a0fcdSRichard Henderson { 1346dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1347dd0a0fcdSRichard Henderson intptr_t i; 1348dd0a0fcdSRichard Henderson 1349dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1350dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1351dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1352dd0a0fcdSRichard Henderson uint8_t dd = aa > bb ? aa : bb; 1353dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1354dd0a0fcdSRichard Henderson } 1355dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1356dd0a0fcdSRichard Henderson } 1357dd0a0fcdSRichard Henderson 1358dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1359dd0a0fcdSRichard Henderson { 1360dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1361dd0a0fcdSRichard Henderson intptr_t i; 1362dd0a0fcdSRichard Henderson 1363dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1364dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1365dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1366dd0a0fcdSRichard Henderson uint16_t dd = aa > bb ? aa : bb; 1367dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1368dd0a0fcdSRichard Henderson } 1369dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1370dd0a0fcdSRichard Henderson } 1371dd0a0fcdSRichard Henderson 1372dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1373dd0a0fcdSRichard Henderson { 1374dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1375dd0a0fcdSRichard Henderson intptr_t i; 1376dd0a0fcdSRichard Henderson 1377dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1378dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1379dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1380dd0a0fcdSRichard Henderson uint32_t dd = aa > bb ? aa : bb; 1381dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1382dd0a0fcdSRichard Henderson } 1383dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1384dd0a0fcdSRichard Henderson } 1385dd0a0fcdSRichard Henderson 1386dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1387dd0a0fcdSRichard Henderson { 1388dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1389dd0a0fcdSRichard Henderson intptr_t i; 1390dd0a0fcdSRichard Henderson 1391dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1392dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1393dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1394dd0a0fcdSRichard Henderson uint64_t dd = aa > bb ? aa : bb; 1395dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1396dd0a0fcdSRichard Henderson } 1397dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1398dd0a0fcdSRichard Henderson } 1399