1db432672SRichard Henderson /* 2db432672SRichard Henderson * Generic vectorized operation runtime 3db432672SRichard Henderson * 4db432672SRichard Henderson * Copyright (c) 2018 Linaro 5db432672SRichard Henderson * 6db432672SRichard Henderson * This library is free software; you can redistribute it and/or 7db432672SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8db432672SRichard Henderson * License as published by the Free Software Foundation; either 9fb0343d5SThomas Huth * version 2.1 of the License, or (at your option) any later version. 10db432672SRichard Henderson * 11db432672SRichard Henderson * This library is distributed in the hope that it will be useful, 12db432672SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13db432672SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14db432672SRichard Henderson * Lesser General Public License for more details. 15db432672SRichard Henderson * 16db432672SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17db432672SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18db432672SRichard Henderson */ 19db432672SRichard Henderson 20db432672SRichard Henderson #include "qemu/osdep.h" 21db432672SRichard Henderson #include "qemu/host-utils.h" 22db432672SRichard Henderson #include "cpu.h" 23db432672SRichard Henderson #include "exec/helper-proto.h" 24dcb32f1dSPhilippe Mathieu-Daudé #include "tcg/tcg-gvec-desc.h" 25db432672SRichard Henderson 26db432672SRichard Henderson 27db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 28db432672SRichard Henderson { 29db432672SRichard Henderson intptr_t maxsz = simd_maxsz(desc); 30db432672SRichard Henderson intptr_t i; 31db432672SRichard Henderson 32db432672SRichard Henderson if (unlikely(maxsz > oprsz)) { 33db432672SRichard Henderson for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 34db432672SRichard Henderson *(uint64_t *)(d + i) = 0; 35db432672SRichard Henderson } 36db432672SRichard Henderson } 37db432672SRichard Henderson } 38db432672SRichard Henderson 39db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 40db432672SRichard Henderson { 41db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 42db432672SRichard Henderson intptr_t i; 43db432672SRichard Henderson 446c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 456c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 46db432672SRichard Henderson } 47db432672SRichard Henderson clear_high(d, oprsz, desc); 48db432672SRichard Henderson } 49db432672SRichard Henderson 50db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 51db432672SRichard Henderson { 52db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 53db432672SRichard Henderson intptr_t i; 54db432672SRichard Henderson 556c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 566c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 57db432672SRichard Henderson } 58db432672SRichard Henderson clear_high(d, oprsz, desc); 59db432672SRichard Henderson } 60db432672SRichard Henderson 61db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 62db432672SRichard Henderson { 63db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 64db432672SRichard Henderson intptr_t i; 65db432672SRichard Henderson 666c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 676c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i); 68db432672SRichard Henderson } 69db432672SRichard Henderson clear_high(d, oprsz, desc); 70db432672SRichard Henderson } 71db432672SRichard Henderson 72db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 73db432672SRichard Henderson { 74db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 75db432672SRichard Henderson intptr_t i; 76db432672SRichard Henderson 776c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 786c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i); 79db432672SRichard Henderson } 80db432672SRichard Henderson clear_high(d, oprsz, desc); 81db432672SRichard Henderson } 82db432672SRichard Henderson 8322fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 8422fc3527SRichard Henderson { 8522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8622fc3527SRichard Henderson intptr_t i; 8722fc3527SRichard Henderson 886c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 890a83e43aSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b; 9022fc3527SRichard Henderson } 9122fc3527SRichard Henderson clear_high(d, oprsz, desc); 9222fc3527SRichard Henderson } 9322fc3527SRichard Henderson 9422fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 9522fc3527SRichard Henderson { 9622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9722fc3527SRichard Henderson intptr_t i; 9822fc3527SRichard Henderson 996c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1000a83e43aSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b; 10122fc3527SRichard Henderson } 10222fc3527SRichard Henderson clear_high(d, oprsz, desc); 10322fc3527SRichard Henderson } 10422fc3527SRichard Henderson 10522fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 10622fc3527SRichard Henderson { 10722fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 10822fc3527SRichard Henderson intptr_t i; 10922fc3527SRichard Henderson 1106c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1110a83e43aSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b; 11222fc3527SRichard Henderson } 11322fc3527SRichard Henderson clear_high(d, oprsz, desc); 11422fc3527SRichard Henderson } 11522fc3527SRichard Henderson 11622fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 11722fc3527SRichard Henderson { 11822fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 11922fc3527SRichard Henderson intptr_t i; 12022fc3527SRichard Henderson 1216c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1220a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b; 12322fc3527SRichard Henderson } 12422fc3527SRichard Henderson clear_high(d, oprsz, desc); 12522fc3527SRichard Henderson } 12622fc3527SRichard Henderson 127db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 128db432672SRichard Henderson { 129db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 130db432672SRichard Henderson intptr_t i; 131db432672SRichard Henderson 1326c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1336c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 134db432672SRichard Henderson } 135db432672SRichard Henderson clear_high(d, oprsz, desc); 136db432672SRichard Henderson } 137db432672SRichard Henderson 138db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 139db432672SRichard Henderson { 140db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 141db432672SRichard Henderson intptr_t i; 142db432672SRichard Henderson 1436c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1446c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 145db432672SRichard Henderson } 146db432672SRichard Henderson clear_high(d, oprsz, desc); 147db432672SRichard Henderson } 148db432672SRichard Henderson 149db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 150db432672SRichard Henderson { 151db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 152db432672SRichard Henderson intptr_t i; 153db432672SRichard Henderson 1546c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1556c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i); 156db432672SRichard Henderson } 157db432672SRichard Henderson clear_high(d, oprsz, desc); 158db432672SRichard Henderson } 159db432672SRichard Henderson 160db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 161db432672SRichard Henderson { 162db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 163db432672SRichard Henderson intptr_t i; 164db432672SRichard Henderson 1656c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1666c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i); 167db432672SRichard Henderson } 168db432672SRichard Henderson clear_high(d, oprsz, desc); 169db432672SRichard Henderson } 170db432672SRichard Henderson 17122fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 17222fc3527SRichard Henderson { 17322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 17422fc3527SRichard Henderson intptr_t i; 17522fc3527SRichard Henderson 1766c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1770a83e43aSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b; 17822fc3527SRichard Henderson } 17922fc3527SRichard Henderson clear_high(d, oprsz, desc); 18022fc3527SRichard Henderson } 18122fc3527SRichard Henderson 18222fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 18322fc3527SRichard Henderson { 18422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 18522fc3527SRichard Henderson intptr_t i; 18622fc3527SRichard Henderson 1876c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1880a83e43aSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b; 18922fc3527SRichard Henderson } 19022fc3527SRichard Henderson clear_high(d, oprsz, desc); 19122fc3527SRichard Henderson } 19222fc3527SRichard Henderson 19322fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 19422fc3527SRichard Henderson { 19522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 19622fc3527SRichard Henderson intptr_t i; 19722fc3527SRichard Henderson 1986c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1990a83e43aSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b; 20022fc3527SRichard Henderson } 20122fc3527SRichard Henderson clear_high(d, oprsz, desc); 20222fc3527SRichard Henderson } 20322fc3527SRichard Henderson 20422fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 20522fc3527SRichard Henderson { 20622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 20722fc3527SRichard Henderson intptr_t i; 20822fc3527SRichard Henderson 2096c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 2100a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b; 21122fc3527SRichard Henderson } 21222fc3527SRichard Henderson clear_high(d, oprsz, desc); 21322fc3527SRichard Henderson } 21422fc3527SRichard Henderson 2153774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 2163774030aSRichard Henderson { 2173774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2183774030aSRichard Henderson intptr_t i; 2193774030aSRichard Henderson 2206c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 2216c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i); 2223774030aSRichard Henderson } 2233774030aSRichard Henderson clear_high(d, oprsz, desc); 2243774030aSRichard Henderson } 2253774030aSRichard Henderson 2263774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 2273774030aSRichard Henderson { 2283774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2293774030aSRichard Henderson intptr_t i; 2303774030aSRichard Henderson 2316c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 2326c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i); 2333774030aSRichard Henderson } 2343774030aSRichard Henderson clear_high(d, oprsz, desc); 2353774030aSRichard Henderson } 2363774030aSRichard Henderson 2373774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 2383774030aSRichard Henderson { 2393774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2403774030aSRichard Henderson intptr_t i; 2413774030aSRichard Henderson 2426c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 2436c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i); 2443774030aSRichard Henderson } 2453774030aSRichard Henderson clear_high(d, oprsz, desc); 2463774030aSRichard Henderson } 2473774030aSRichard Henderson 2483774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 2493774030aSRichard Henderson { 2503774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2513774030aSRichard Henderson intptr_t i; 2523774030aSRichard Henderson 2536c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 2546c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i); 2553774030aSRichard Henderson } 2563774030aSRichard Henderson clear_high(d, oprsz, desc); 2573774030aSRichard Henderson } 2583774030aSRichard Henderson 25922fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 26022fc3527SRichard Henderson { 26122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 26222fc3527SRichard Henderson intptr_t i; 26322fc3527SRichard Henderson 2646c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 2650a83e43aSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b; 26622fc3527SRichard Henderson } 26722fc3527SRichard Henderson clear_high(d, oprsz, desc); 26822fc3527SRichard Henderson } 26922fc3527SRichard Henderson 27022fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 27122fc3527SRichard Henderson { 27222fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 27322fc3527SRichard Henderson intptr_t i; 27422fc3527SRichard Henderson 2756c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 2760a83e43aSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b; 27722fc3527SRichard Henderson } 27822fc3527SRichard Henderson clear_high(d, oprsz, desc); 27922fc3527SRichard Henderson } 28022fc3527SRichard Henderson 28122fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 28222fc3527SRichard Henderson { 28322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 28422fc3527SRichard Henderson intptr_t i; 28522fc3527SRichard Henderson 2866c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 2870a83e43aSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b; 28822fc3527SRichard Henderson } 28922fc3527SRichard Henderson clear_high(d, oprsz, desc); 29022fc3527SRichard Henderson } 29122fc3527SRichard Henderson 29222fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 29322fc3527SRichard Henderson { 29422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 29522fc3527SRichard Henderson intptr_t i; 29622fc3527SRichard Henderson 2976c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 2980a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b; 29922fc3527SRichard Henderson } 30022fc3527SRichard Henderson clear_high(d, oprsz, desc); 30122fc3527SRichard Henderson } 30222fc3527SRichard Henderson 303db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 304db432672SRichard Henderson { 305db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 306db432672SRichard Henderson intptr_t i; 307db432672SRichard Henderson 3086c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 3096c7ab301SRichard Henderson *(uint8_t *)(d + i) = -*(uint8_t *)(a + i); 310db432672SRichard Henderson } 311db432672SRichard Henderson clear_high(d, oprsz, desc); 312db432672SRichard Henderson } 313db432672SRichard Henderson 314db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 315db432672SRichard Henderson { 316db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 317db432672SRichard Henderson intptr_t i; 318db432672SRichard Henderson 3196c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 3206c7ab301SRichard Henderson *(uint16_t *)(d + i) = -*(uint16_t *)(a + i); 321db432672SRichard Henderson } 322db432672SRichard Henderson clear_high(d, oprsz, desc); 323db432672SRichard Henderson } 324db432672SRichard Henderson 325db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 326db432672SRichard Henderson { 327db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 328db432672SRichard Henderson intptr_t i; 329db432672SRichard Henderson 3306c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 3316c7ab301SRichard Henderson *(uint32_t *)(d + i) = -*(uint32_t *)(a + i); 332db432672SRichard Henderson } 333db432672SRichard Henderson clear_high(d, oprsz, desc); 334db432672SRichard Henderson } 335db432672SRichard Henderson 336db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 337db432672SRichard Henderson { 338db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 339db432672SRichard Henderson intptr_t i; 340db432672SRichard Henderson 3416c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 3426c7ab301SRichard Henderson *(uint64_t *)(d + i) = -*(uint64_t *)(a + i); 343db432672SRichard Henderson } 344db432672SRichard Henderson clear_high(d, oprsz, desc); 345db432672SRichard Henderson } 346db432672SRichard Henderson 347bcefc902SRichard Henderson void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) 348bcefc902SRichard Henderson { 349bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 350bcefc902SRichard Henderson intptr_t i; 351bcefc902SRichard Henderson 352bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 353bcefc902SRichard Henderson int8_t aa = *(int8_t *)(a + i); 354bcefc902SRichard Henderson *(int8_t *)(d + i) = aa < 0 ? -aa : aa; 355bcefc902SRichard Henderson } 356bcefc902SRichard Henderson clear_high(d, oprsz, desc); 357bcefc902SRichard Henderson } 358bcefc902SRichard Henderson 359bcefc902SRichard Henderson void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) 360bcefc902SRichard Henderson { 361bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 362bcefc902SRichard Henderson intptr_t i; 363bcefc902SRichard Henderson 364bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 365bcefc902SRichard Henderson int16_t aa = *(int16_t *)(a + i); 366bcefc902SRichard Henderson *(int16_t *)(d + i) = aa < 0 ? -aa : aa; 367bcefc902SRichard Henderson } 368bcefc902SRichard Henderson clear_high(d, oprsz, desc); 369bcefc902SRichard Henderson } 370bcefc902SRichard Henderson 371bcefc902SRichard Henderson void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) 372bcefc902SRichard Henderson { 373bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 374bcefc902SRichard Henderson intptr_t i; 375bcefc902SRichard Henderson 376bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 377bcefc902SRichard Henderson int32_t aa = *(int32_t *)(a + i); 378bcefc902SRichard Henderson *(int32_t *)(d + i) = aa < 0 ? -aa : aa; 379bcefc902SRichard Henderson } 380bcefc902SRichard Henderson clear_high(d, oprsz, desc); 381bcefc902SRichard Henderson } 382bcefc902SRichard Henderson 383bcefc902SRichard Henderson void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) 384bcefc902SRichard Henderson { 385bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 386bcefc902SRichard Henderson intptr_t i; 387bcefc902SRichard Henderson 388bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 389bcefc902SRichard Henderson int64_t aa = *(int64_t *)(a + i); 390bcefc902SRichard Henderson *(int64_t *)(d + i) = aa < 0 ? -aa : aa; 391bcefc902SRichard Henderson } 392bcefc902SRichard Henderson clear_high(d, oprsz, desc); 393bcefc902SRichard Henderson } 394bcefc902SRichard Henderson 395db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 396db432672SRichard Henderson { 397db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 398db432672SRichard Henderson 399db432672SRichard Henderson memcpy(d, a, oprsz); 400db432672SRichard Henderson clear_high(d, oprsz, desc); 401db432672SRichard Henderson } 402db432672SRichard Henderson 403db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 404db432672SRichard Henderson { 405db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 406db432672SRichard Henderson intptr_t i; 407db432672SRichard Henderson 408db432672SRichard Henderson if (c == 0) { 409db432672SRichard Henderson oprsz = 0; 410db432672SRichard Henderson } else { 411db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 412db432672SRichard Henderson *(uint64_t *)(d + i) = c; 413db432672SRichard Henderson } 414db432672SRichard Henderson } 415db432672SRichard Henderson clear_high(d, oprsz, desc); 416db432672SRichard Henderson } 417db432672SRichard Henderson 418db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 419db432672SRichard Henderson { 420db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 421db432672SRichard Henderson intptr_t i; 422db432672SRichard Henderson 423db432672SRichard Henderson if (c == 0) { 424db432672SRichard Henderson oprsz = 0; 425db432672SRichard Henderson } else { 426db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 427db432672SRichard Henderson *(uint32_t *)(d + i) = c; 428db432672SRichard Henderson } 429db432672SRichard Henderson } 430db432672SRichard Henderson clear_high(d, oprsz, desc); 431db432672SRichard Henderson } 432db432672SRichard Henderson 433db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 434db432672SRichard Henderson { 435db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 436db432672SRichard Henderson } 437db432672SRichard Henderson 438db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 439db432672SRichard Henderson { 440db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 441db432672SRichard Henderson } 442db432672SRichard Henderson 443db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 444db432672SRichard Henderson { 445db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 446db432672SRichard Henderson intptr_t i; 447db432672SRichard Henderson 4486c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 4496c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i); 450db432672SRichard Henderson } 451db432672SRichard Henderson clear_high(d, oprsz, desc); 452db432672SRichard Henderson } 453db432672SRichard Henderson 454db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 455db432672SRichard Henderson { 456db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 457db432672SRichard Henderson intptr_t i; 458db432672SRichard Henderson 4596c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 4606c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i); 461db432672SRichard Henderson } 462db432672SRichard Henderson clear_high(d, oprsz, desc); 463db432672SRichard Henderson } 464db432672SRichard Henderson 465db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 466db432672SRichard Henderson { 467db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 468db432672SRichard Henderson intptr_t i; 469db432672SRichard Henderson 4706c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 4716c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i); 472db432672SRichard Henderson } 473db432672SRichard Henderson clear_high(d, oprsz, desc); 474db432672SRichard Henderson } 475db432672SRichard Henderson 476db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 477db432672SRichard Henderson { 478db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 479db432672SRichard Henderson intptr_t i; 480db432672SRichard Henderson 4816c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 4826c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i); 483db432672SRichard Henderson } 484db432672SRichard Henderson clear_high(d, oprsz, desc); 485db432672SRichard Henderson } 486db432672SRichard Henderson 487db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 488db432672SRichard Henderson { 489db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 490db432672SRichard Henderson intptr_t i; 491db432672SRichard Henderson 4926c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 4936c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i); 494db432672SRichard Henderson } 495db432672SRichard Henderson clear_high(d, oprsz, desc); 496db432672SRichard Henderson } 497db432672SRichard Henderson 498db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 499db432672SRichard Henderson { 500db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 501db432672SRichard Henderson intptr_t i; 502db432672SRichard Henderson 5036c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5046c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i); 505db432672SRichard Henderson } 506db432672SRichard Henderson clear_high(d, oprsz, desc); 507db432672SRichard Henderson } 508d0ec9796SRichard Henderson 509f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 510f550805dSRichard Henderson { 511f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 512f550805dSRichard Henderson intptr_t i; 513f550805dSRichard Henderson 5146c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5156c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i)); 516f550805dSRichard Henderson } 517f550805dSRichard Henderson clear_high(d, oprsz, desc); 518f550805dSRichard Henderson } 519f550805dSRichard Henderson 520f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 521f550805dSRichard Henderson { 522f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 523f550805dSRichard Henderson intptr_t i; 524f550805dSRichard Henderson 5256c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5266c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i)); 527f550805dSRichard Henderson } 528f550805dSRichard Henderson clear_high(d, oprsz, desc); 529f550805dSRichard Henderson } 530f550805dSRichard Henderson 531f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 532f550805dSRichard Henderson { 533f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 534f550805dSRichard Henderson intptr_t i; 535f550805dSRichard Henderson 5366c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5376c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i)); 538f550805dSRichard Henderson } 539f550805dSRichard Henderson clear_high(d, oprsz, desc); 540f550805dSRichard Henderson } 541f550805dSRichard Henderson 54222fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 54322fc3527SRichard Henderson { 54422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 54522fc3527SRichard Henderson intptr_t i; 54622fc3527SRichard Henderson 5476c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5480a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b; 54922fc3527SRichard Henderson } 55022fc3527SRichard Henderson clear_high(d, oprsz, desc); 55122fc3527SRichard Henderson } 55222fc3527SRichard Henderson 553*4221aa4aSNazar Kazakov void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc) 554*4221aa4aSNazar Kazakov { 555*4221aa4aSNazar Kazakov intptr_t oprsz = simd_oprsz(desc); 556*4221aa4aSNazar Kazakov intptr_t i; 557*4221aa4aSNazar Kazakov 558*4221aa4aSNazar Kazakov for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 559*4221aa4aSNazar Kazakov *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b; 560*4221aa4aSNazar Kazakov } 561*4221aa4aSNazar Kazakov clear_high(d, oprsz, desc); 562*4221aa4aSNazar Kazakov } 563*4221aa4aSNazar Kazakov 56422fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 56522fc3527SRichard Henderson { 56622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 56722fc3527SRichard Henderson intptr_t i; 56822fc3527SRichard Henderson 5696c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5700a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b; 57122fc3527SRichard Henderson } 57222fc3527SRichard Henderson clear_high(d, oprsz, desc); 57322fc3527SRichard Henderson } 57422fc3527SRichard Henderson 57522fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 57622fc3527SRichard Henderson { 57722fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 57822fc3527SRichard Henderson intptr_t i; 57922fc3527SRichard Henderson 5806c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5810a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b; 58222fc3527SRichard Henderson } 58322fc3527SRichard Henderson clear_high(d, oprsz, desc); 58422fc3527SRichard Henderson } 58522fc3527SRichard Henderson 586d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 587d0ec9796SRichard Henderson { 588d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 589d0ec9796SRichard Henderson int shift = simd_data(desc); 590d0ec9796SRichard Henderson intptr_t i; 591d0ec9796SRichard Henderson 5926c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 5936c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift; 594d0ec9796SRichard Henderson } 595d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 596d0ec9796SRichard Henderson } 597d0ec9796SRichard Henderson 598d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 599d0ec9796SRichard Henderson { 600d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 601d0ec9796SRichard Henderson int shift = simd_data(desc); 602d0ec9796SRichard Henderson intptr_t i; 603d0ec9796SRichard Henderson 6046c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 6056c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift; 606d0ec9796SRichard Henderson } 607d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 608d0ec9796SRichard Henderson } 609d0ec9796SRichard Henderson 610d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 611d0ec9796SRichard Henderson { 612d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 613d0ec9796SRichard Henderson int shift = simd_data(desc); 614d0ec9796SRichard Henderson intptr_t i; 615d0ec9796SRichard Henderson 6166c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 6176c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift; 618d0ec9796SRichard Henderson } 619d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 620d0ec9796SRichard Henderson } 621d0ec9796SRichard Henderson 622d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 623d0ec9796SRichard Henderson { 624d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 625d0ec9796SRichard Henderson int shift = simd_data(desc); 626d0ec9796SRichard Henderson intptr_t i; 627d0ec9796SRichard Henderson 6286c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 6296c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift; 630d0ec9796SRichard Henderson } 631d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 632d0ec9796SRichard Henderson } 633d0ec9796SRichard Henderson 634d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 635d0ec9796SRichard Henderson { 636d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 637d0ec9796SRichard Henderson int shift = simd_data(desc); 638d0ec9796SRichard Henderson intptr_t i; 639d0ec9796SRichard Henderson 6406c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 6416c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift; 642d0ec9796SRichard Henderson } 643d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 644d0ec9796SRichard Henderson } 645d0ec9796SRichard Henderson 646d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 647d0ec9796SRichard Henderson { 648d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 649d0ec9796SRichard Henderson int shift = simd_data(desc); 650d0ec9796SRichard Henderson intptr_t i; 651d0ec9796SRichard Henderson 6526c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 6536c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift; 654d0ec9796SRichard Henderson } 655d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 656d0ec9796SRichard Henderson } 657d0ec9796SRichard Henderson 658d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 659d0ec9796SRichard Henderson { 660d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 661d0ec9796SRichard Henderson int shift = simd_data(desc); 662d0ec9796SRichard Henderson intptr_t i; 663d0ec9796SRichard Henderson 6646c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 6656c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift; 666d0ec9796SRichard Henderson } 667d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 668d0ec9796SRichard Henderson } 669d0ec9796SRichard Henderson 670d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 671d0ec9796SRichard Henderson { 672d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 673d0ec9796SRichard Henderson int shift = simd_data(desc); 674d0ec9796SRichard Henderson intptr_t i; 675d0ec9796SRichard Henderson 6766c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 6776c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift; 678d0ec9796SRichard Henderson } 679d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 680d0ec9796SRichard Henderson } 681d0ec9796SRichard Henderson 682d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 683d0ec9796SRichard Henderson { 684d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 685d0ec9796SRichard Henderson int shift = simd_data(desc); 686d0ec9796SRichard Henderson intptr_t i; 687d0ec9796SRichard Henderson 6886c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 6896c7ab301SRichard Henderson *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift; 690d0ec9796SRichard Henderson } 691d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 692d0ec9796SRichard Henderson } 693d0ec9796SRichard Henderson 694d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 695d0ec9796SRichard Henderson { 696d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 697d0ec9796SRichard Henderson int shift = simd_data(desc); 698d0ec9796SRichard Henderson intptr_t i; 699d0ec9796SRichard Henderson 7006c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 7016c7ab301SRichard Henderson *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift; 702d0ec9796SRichard Henderson } 703d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 704d0ec9796SRichard Henderson } 705d0ec9796SRichard Henderson 706d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 707d0ec9796SRichard Henderson { 708d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 709d0ec9796SRichard Henderson int shift = simd_data(desc); 710d0ec9796SRichard Henderson intptr_t i; 711d0ec9796SRichard Henderson 7126c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 7136c7ab301SRichard Henderson *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift; 714d0ec9796SRichard Henderson } 715d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 716d0ec9796SRichard Henderson } 717d0ec9796SRichard Henderson 718d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 719d0ec9796SRichard Henderson { 720d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 721d0ec9796SRichard Henderson int shift = simd_data(desc); 722d0ec9796SRichard Henderson intptr_t i; 723d0ec9796SRichard Henderson 7246c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 7256c7ab301SRichard Henderson *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift; 726d0ec9796SRichard Henderson } 727d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 728d0ec9796SRichard Henderson } 729212be173SRichard Henderson 730b0f7e744SRichard Henderson void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc) 731b0f7e744SRichard Henderson { 732b0f7e744SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 733b0f7e744SRichard Henderson int shift = simd_data(desc); 734b0f7e744SRichard Henderson intptr_t i; 735b0f7e744SRichard Henderson 736b0f7e744SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 737b0f7e744SRichard Henderson *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift); 738b0f7e744SRichard Henderson } 739b0f7e744SRichard Henderson clear_high(d, oprsz, desc); 740b0f7e744SRichard Henderson } 741b0f7e744SRichard Henderson 742b0f7e744SRichard Henderson void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc) 743b0f7e744SRichard Henderson { 744b0f7e744SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 745b0f7e744SRichard Henderson int shift = simd_data(desc); 746b0f7e744SRichard Henderson intptr_t i; 747b0f7e744SRichard Henderson 748b0f7e744SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 749b0f7e744SRichard Henderson *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift); 750b0f7e744SRichard Henderson } 751b0f7e744SRichard Henderson clear_high(d, oprsz, desc); 752b0f7e744SRichard Henderson } 753b0f7e744SRichard Henderson 754b0f7e744SRichard Henderson void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc) 755b0f7e744SRichard Henderson { 756b0f7e744SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 757b0f7e744SRichard Henderson int shift = simd_data(desc); 758b0f7e744SRichard Henderson intptr_t i; 759b0f7e744SRichard Henderson 760b0f7e744SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 761b0f7e744SRichard Henderson *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift); 762b0f7e744SRichard Henderson } 763b0f7e744SRichard Henderson clear_high(d, oprsz, desc); 764b0f7e744SRichard Henderson } 765b0f7e744SRichard Henderson 766b0f7e744SRichard Henderson void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc) 767b0f7e744SRichard Henderson { 768b0f7e744SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 769b0f7e744SRichard Henderson int shift = simd_data(desc); 770b0f7e744SRichard Henderson intptr_t i; 771b0f7e744SRichard Henderson 772b0f7e744SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 773b0f7e744SRichard Henderson *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift); 774b0f7e744SRichard Henderson } 775b0f7e744SRichard Henderson clear_high(d, oprsz, desc); 776b0f7e744SRichard Henderson } 777b0f7e744SRichard Henderson 7785ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 7795ee5c14cSRichard Henderson { 7805ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7815ee5c14cSRichard Henderson intptr_t i; 7825ee5c14cSRichard Henderson 7835ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 7845ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 7855ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 7865ee5c14cSRichard Henderson } 7875ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7885ee5c14cSRichard Henderson } 7895ee5c14cSRichard Henderson 7905ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 7915ee5c14cSRichard Henderson { 7925ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7935ee5c14cSRichard Henderson intptr_t i; 7945ee5c14cSRichard Henderson 7955ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 7965ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 7975ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 7985ee5c14cSRichard Henderson } 7995ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8005ee5c14cSRichard Henderson } 8015ee5c14cSRichard Henderson 8025ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 8035ee5c14cSRichard Henderson { 8045ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8055ee5c14cSRichard Henderson intptr_t i; 8065ee5c14cSRichard Henderson 8075ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 8085ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 8095ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 8105ee5c14cSRichard Henderson } 8115ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8125ee5c14cSRichard Henderson } 8135ee5c14cSRichard Henderson 8145ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 8155ee5c14cSRichard Henderson { 8165ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8175ee5c14cSRichard Henderson intptr_t i; 8185ee5c14cSRichard Henderson 8195ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 8205ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 8215ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 8225ee5c14cSRichard Henderson } 8235ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8245ee5c14cSRichard Henderson } 8255ee5c14cSRichard Henderson 8265ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 8275ee5c14cSRichard Henderson { 8285ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8295ee5c14cSRichard Henderson intptr_t i; 8305ee5c14cSRichard Henderson 8315ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 8325ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 8335ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 8345ee5c14cSRichard Henderson } 8355ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8365ee5c14cSRichard Henderson } 8375ee5c14cSRichard Henderson 8385ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 8395ee5c14cSRichard Henderson { 8405ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8415ee5c14cSRichard Henderson intptr_t i; 8425ee5c14cSRichard Henderson 8435ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 8445ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 8455ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 8465ee5c14cSRichard Henderson } 8475ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8485ee5c14cSRichard Henderson } 8495ee5c14cSRichard Henderson 8505ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 8515ee5c14cSRichard Henderson { 8525ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8535ee5c14cSRichard Henderson intptr_t i; 8545ee5c14cSRichard Henderson 8555ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 8565ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 8575ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 8585ee5c14cSRichard Henderson } 8595ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8605ee5c14cSRichard Henderson } 8615ee5c14cSRichard Henderson 8625ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 8635ee5c14cSRichard Henderson { 8645ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8655ee5c14cSRichard Henderson intptr_t i; 8665ee5c14cSRichard Henderson 8675ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 8685ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 8695ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 8705ee5c14cSRichard Henderson } 8715ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8725ee5c14cSRichard Henderson } 8735ee5c14cSRichard Henderson 8745ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 8755ee5c14cSRichard Henderson { 8765ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8775ee5c14cSRichard Henderson intptr_t i; 8785ee5c14cSRichard Henderson 879899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 8805ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 8815ee5c14cSRichard Henderson *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 8825ee5c14cSRichard Henderson } 8835ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8845ee5c14cSRichard Henderson } 8855ee5c14cSRichard Henderson 8865ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 8875ee5c14cSRichard Henderson { 8885ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8895ee5c14cSRichard Henderson intptr_t i; 8905ee5c14cSRichard Henderson 8915ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 8925ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 8935ee5c14cSRichard Henderson *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 8945ee5c14cSRichard Henderson } 8955ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8965ee5c14cSRichard Henderson } 8975ee5c14cSRichard Henderson 8985ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 8995ee5c14cSRichard Henderson { 9005ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9015ee5c14cSRichard Henderson intptr_t i; 9025ee5c14cSRichard Henderson 903899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 9045ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 9055ee5c14cSRichard Henderson *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 9065ee5c14cSRichard Henderson } 9075ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 9085ee5c14cSRichard Henderson } 9095ee5c14cSRichard Henderson 9105ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 9115ee5c14cSRichard Henderson { 9125ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9135ee5c14cSRichard Henderson intptr_t i; 9145ee5c14cSRichard Henderson 915899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 9165ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 9175ee5c14cSRichard Henderson *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 9185ee5c14cSRichard Henderson } 9195ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 9205ee5c14cSRichard Henderson } 9215ee5c14cSRichard Henderson 9225d0ceda9SRichard Henderson void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc) 9235d0ceda9SRichard Henderson { 9245d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9255d0ceda9SRichard Henderson intptr_t i; 9265d0ceda9SRichard Henderson 9275d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 9285d0ceda9SRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 9295d0ceda9SRichard Henderson *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh); 9305d0ceda9SRichard Henderson } 9315d0ceda9SRichard Henderson clear_high(d, oprsz, desc); 9325d0ceda9SRichard Henderson } 9335d0ceda9SRichard Henderson 9345d0ceda9SRichard Henderson void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc) 9355d0ceda9SRichard Henderson { 9365d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9375d0ceda9SRichard Henderson intptr_t i; 9385d0ceda9SRichard Henderson 9395d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 9405d0ceda9SRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 9415d0ceda9SRichard Henderson *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh); 9425d0ceda9SRichard Henderson } 9435d0ceda9SRichard Henderson clear_high(d, oprsz, desc); 9445d0ceda9SRichard Henderson } 9455d0ceda9SRichard Henderson 9465d0ceda9SRichard Henderson void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc) 9475d0ceda9SRichard Henderson { 9485d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9495d0ceda9SRichard Henderson intptr_t i; 9505d0ceda9SRichard Henderson 9515d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 9525d0ceda9SRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 9535d0ceda9SRichard Henderson *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh); 9545d0ceda9SRichard Henderson } 9555d0ceda9SRichard Henderson clear_high(d, oprsz, desc); 9565d0ceda9SRichard Henderson } 9575d0ceda9SRichard Henderson 9585d0ceda9SRichard Henderson void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc) 9595d0ceda9SRichard Henderson { 9605d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9615d0ceda9SRichard Henderson intptr_t i; 9625d0ceda9SRichard Henderson 9635d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 9645d0ceda9SRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 9655d0ceda9SRichard Henderson *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh); 9665d0ceda9SRichard Henderson } 9675d0ceda9SRichard Henderson clear_high(d, oprsz, desc); 9685d0ceda9SRichard Henderson } 9695d0ceda9SRichard Henderson 9705d0ceda9SRichard Henderson void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc) 9715d0ceda9SRichard Henderson { 9725d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9735d0ceda9SRichard Henderson intptr_t i; 9745d0ceda9SRichard Henderson 9755d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 9765d0ceda9SRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 9775d0ceda9SRichard Henderson *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh); 9785d0ceda9SRichard Henderson } 9795d0ceda9SRichard Henderson clear_high(d, oprsz, desc); 9805d0ceda9SRichard Henderson } 9815d0ceda9SRichard Henderson 9825d0ceda9SRichard Henderson void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc) 9835d0ceda9SRichard Henderson { 9845d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9855d0ceda9SRichard Henderson intptr_t i; 9865d0ceda9SRichard Henderson 9875d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 9885d0ceda9SRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 9895d0ceda9SRichard Henderson *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh); 9905d0ceda9SRichard Henderson } 9915d0ceda9SRichard Henderson clear_high(d, oprsz, desc); 9925d0ceda9SRichard Henderson } 9935d0ceda9SRichard Henderson 9945d0ceda9SRichard Henderson void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc) 9955d0ceda9SRichard Henderson { 9965d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9975d0ceda9SRichard Henderson intptr_t i; 9985d0ceda9SRichard Henderson 9995d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 10005d0ceda9SRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 10015d0ceda9SRichard Henderson *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh); 10025d0ceda9SRichard Henderson } 10035d0ceda9SRichard Henderson clear_high(d, oprsz, desc); 10045d0ceda9SRichard Henderson } 10055d0ceda9SRichard Henderson 10065d0ceda9SRichard Henderson void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc) 10075d0ceda9SRichard Henderson { 10085d0ceda9SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 10095d0ceda9SRichard Henderson intptr_t i; 10105d0ceda9SRichard Henderson 10115d0ceda9SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 10125d0ceda9SRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 10135d0ceda9SRichard Henderson *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh); 10145d0ceda9SRichard Henderson } 10155d0ceda9SRichard Henderson clear_high(d, oprsz, desc); 10165d0ceda9SRichard Henderson } 10175d0ceda9SRichard Henderson 1018212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP) \ 1019212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 1020212be173SRichard Henderson { \ 1021212be173SRichard Henderson intptr_t oprsz = simd_oprsz(desc); \ 1022212be173SRichard Henderson intptr_t i; \ 10236cb1d3b8SRichard Henderson for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 10240270bd50SRichard Henderson *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 1025212be173SRichard Henderson } \ 1026212be173SRichard Henderson clear_high(d, oprsz, desc); \ 1027212be173SRichard Henderson } 1028212be173SRichard Henderson 1029212be173SRichard Henderson #define DO_CMP2(SZ) \ 10306c7ab301SRichard Henderson DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \ 10316c7ab301SRichard Henderson DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \ 10326c7ab301SRichard Henderson DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \ 10336c7ab301SRichard Henderson DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \ 10346c7ab301SRichard Henderson DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \ 10356c7ab301SRichard Henderson DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=) 1036212be173SRichard Henderson 1037212be173SRichard Henderson DO_CMP2(8) 1038212be173SRichard Henderson DO_CMP2(16) 1039212be173SRichard Henderson DO_CMP2(32) 1040212be173SRichard Henderson DO_CMP2(64) 1041212be173SRichard Henderson 1042212be173SRichard Henderson #undef DO_CMP1 1043212be173SRichard Henderson #undef DO_CMP2 1044f49b12c6SRichard Henderson 1045f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 1046f49b12c6SRichard Henderson { 1047f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1048f49b12c6SRichard Henderson intptr_t i; 1049f49b12c6SRichard Henderson 1050f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1051f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 1052f49b12c6SRichard Henderson if (r > INT8_MAX) { 1053f49b12c6SRichard Henderson r = INT8_MAX; 1054f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 1055f49b12c6SRichard Henderson r = INT8_MIN; 1056f49b12c6SRichard Henderson } 1057f49b12c6SRichard Henderson *(int8_t *)(d + i) = r; 1058f49b12c6SRichard Henderson } 1059f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1060f49b12c6SRichard Henderson } 1061f49b12c6SRichard Henderson 1062f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 1063f49b12c6SRichard Henderson { 1064f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1065f49b12c6SRichard Henderson intptr_t i; 1066f49b12c6SRichard Henderson 1067f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1068f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 1069f49b12c6SRichard Henderson if (r > INT16_MAX) { 1070f49b12c6SRichard Henderson r = INT16_MAX; 1071f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 1072f49b12c6SRichard Henderson r = INT16_MIN; 1073f49b12c6SRichard Henderson } 1074f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 1075f49b12c6SRichard Henderson } 1076f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1077f49b12c6SRichard Henderson } 1078f49b12c6SRichard Henderson 1079f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 1080f49b12c6SRichard Henderson { 1081f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1082f49b12c6SRichard Henderson intptr_t i; 1083f49b12c6SRichard Henderson 1084f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1085f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 1086f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 10877702a855SRichard Henderson int32_t di; 10887702a855SRichard Henderson if (sadd32_overflow(ai, bi, &di)) { 1089f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 1090f49b12c6SRichard Henderson } 1091f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 1092f49b12c6SRichard Henderson } 1093f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1094f49b12c6SRichard Henderson } 1095f49b12c6SRichard Henderson 1096f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 1097f49b12c6SRichard Henderson { 1098f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1099f49b12c6SRichard Henderson intptr_t i; 1100f49b12c6SRichard Henderson 1101f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1102f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 1103f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 11047702a855SRichard Henderson int64_t di; 11057702a855SRichard Henderson if (sadd64_overflow(ai, bi, &di)) { 1106f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 1107f49b12c6SRichard Henderson } 1108f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 1109f49b12c6SRichard Henderson } 1110f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1111f49b12c6SRichard Henderson } 1112f49b12c6SRichard Henderson 1113f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 1114f49b12c6SRichard Henderson { 1115f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1116f49b12c6SRichard Henderson intptr_t i; 1117f49b12c6SRichard Henderson 1118f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1119f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 1120f49b12c6SRichard Henderson if (r > INT8_MAX) { 1121f49b12c6SRichard Henderson r = INT8_MAX; 1122f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 1123f49b12c6SRichard Henderson r = INT8_MIN; 1124f49b12c6SRichard Henderson } 1125f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1126f49b12c6SRichard Henderson } 1127f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1128f49b12c6SRichard Henderson } 1129f49b12c6SRichard Henderson 1130f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 1131f49b12c6SRichard Henderson { 1132f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1133f49b12c6SRichard Henderson intptr_t i; 1134f49b12c6SRichard Henderson 1135f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1136f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 1137f49b12c6SRichard Henderson if (r > INT16_MAX) { 1138f49b12c6SRichard Henderson r = INT16_MAX; 1139f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 1140f49b12c6SRichard Henderson r = INT16_MIN; 1141f49b12c6SRichard Henderson } 1142f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 1143f49b12c6SRichard Henderson } 1144f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1145f49b12c6SRichard Henderson } 1146f49b12c6SRichard Henderson 1147f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 1148f49b12c6SRichard Henderson { 1149f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1150f49b12c6SRichard Henderson intptr_t i; 1151f49b12c6SRichard Henderson 1152f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1153f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 1154f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 11557702a855SRichard Henderson int32_t di; 11567702a855SRichard Henderson if (ssub32_overflow(ai, bi, &di)) { 1157f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 1158f49b12c6SRichard Henderson } 1159f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 1160f49b12c6SRichard Henderson } 1161f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1162f49b12c6SRichard Henderson } 1163f49b12c6SRichard Henderson 1164f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1165f49b12c6SRichard Henderson { 1166f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1167f49b12c6SRichard Henderson intptr_t i; 1168f49b12c6SRichard Henderson 1169f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1170f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 1171f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 11727702a855SRichard Henderson int64_t di; 11737702a855SRichard Henderson if (ssub64_overflow(ai, bi, &di)) { 1174f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 1175f49b12c6SRichard Henderson } 1176f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 1177f49b12c6SRichard Henderson } 1178f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1179f49b12c6SRichard Henderson } 1180f49b12c6SRichard Henderson 1181f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1182f49b12c6SRichard Henderson { 1183f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1184f49b12c6SRichard Henderson intptr_t i; 1185f49b12c6SRichard Henderson 1186f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1187f49b12c6SRichard Henderson unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1188f49b12c6SRichard Henderson if (r > UINT8_MAX) { 1189f49b12c6SRichard Henderson r = UINT8_MAX; 1190f49b12c6SRichard Henderson } 1191f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1192f49b12c6SRichard Henderson } 1193f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1194f49b12c6SRichard Henderson } 1195f49b12c6SRichard Henderson 1196f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1197f49b12c6SRichard Henderson { 1198f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1199f49b12c6SRichard Henderson intptr_t i; 1200f49b12c6SRichard Henderson 1201f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1202f49b12c6SRichard Henderson unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1203f49b12c6SRichard Henderson if (r > UINT16_MAX) { 1204f49b12c6SRichard Henderson r = UINT16_MAX; 1205f49b12c6SRichard Henderson } 1206f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 1207f49b12c6SRichard Henderson } 1208f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1209f49b12c6SRichard Henderson } 1210f49b12c6SRichard Henderson 1211f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1212f49b12c6SRichard Henderson { 1213f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1214f49b12c6SRichard Henderson intptr_t i; 1215f49b12c6SRichard Henderson 1216f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1217f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1218f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 12197702a855SRichard Henderson uint32_t di; 12207702a855SRichard Henderson if (uadd32_overflow(ai, bi, &di)) { 1221f49b12c6SRichard Henderson di = UINT32_MAX; 1222f49b12c6SRichard Henderson } 1223f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1224f49b12c6SRichard Henderson } 1225f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1226f49b12c6SRichard Henderson } 1227f49b12c6SRichard Henderson 1228f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1229f49b12c6SRichard Henderson { 1230f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1231f49b12c6SRichard Henderson intptr_t i; 1232f49b12c6SRichard Henderson 1233f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1234f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1235f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 12367702a855SRichard Henderson uint64_t di; 12377702a855SRichard Henderson if (uadd64_overflow(ai, bi, &di)) { 1238f49b12c6SRichard Henderson di = UINT64_MAX; 1239f49b12c6SRichard Henderson } 1240f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1241f49b12c6SRichard Henderson } 1242f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1243f49b12c6SRichard Henderson } 1244f49b12c6SRichard Henderson 1245f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1246f49b12c6SRichard Henderson { 1247f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1248f49b12c6SRichard Henderson intptr_t i; 1249f49b12c6SRichard Henderson 1250f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1251f49b12c6SRichard Henderson int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1252f49b12c6SRichard Henderson if (r < 0) { 1253f49b12c6SRichard Henderson r = 0; 1254f49b12c6SRichard Henderson } 1255f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1256f49b12c6SRichard Henderson } 1257f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1258f49b12c6SRichard Henderson } 1259f49b12c6SRichard Henderson 1260f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1261f49b12c6SRichard Henderson { 1262f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1263f49b12c6SRichard Henderson intptr_t i; 1264f49b12c6SRichard Henderson 1265f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1266f49b12c6SRichard Henderson int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1267f49b12c6SRichard Henderson if (r < 0) { 1268f49b12c6SRichard Henderson r = 0; 1269f49b12c6SRichard Henderson } 1270f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 1271f49b12c6SRichard Henderson } 1272f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1273f49b12c6SRichard Henderson } 1274f49b12c6SRichard Henderson 1275f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1276f49b12c6SRichard Henderson { 1277f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1278f49b12c6SRichard Henderson intptr_t i; 1279f49b12c6SRichard Henderson 1280f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1281f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1282f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 12837702a855SRichard Henderson uint32_t di; 12847702a855SRichard Henderson if (usub32_overflow(ai, bi, &di)) { 1285f49b12c6SRichard Henderson di = 0; 1286f49b12c6SRichard Henderson } 1287f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1288f49b12c6SRichard Henderson } 1289f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1290f49b12c6SRichard Henderson } 1291f49b12c6SRichard Henderson 1292f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1293f49b12c6SRichard Henderson { 1294f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1295f49b12c6SRichard Henderson intptr_t i; 1296f49b12c6SRichard Henderson 1297f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1298f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1299f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 13007702a855SRichard Henderson uint64_t di; 13017702a855SRichard Henderson if (usub64_overflow(ai, bi, &di)) { 1302f49b12c6SRichard Henderson di = 0; 1303f49b12c6SRichard Henderson } 1304f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1305f49b12c6SRichard Henderson } 1306f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1307f49b12c6SRichard Henderson } 1308dd0a0fcdSRichard Henderson 1309dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1310dd0a0fcdSRichard Henderson { 1311dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1312dd0a0fcdSRichard Henderson intptr_t i; 1313dd0a0fcdSRichard Henderson 1314dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1315dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1316dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1317dd0a0fcdSRichard Henderson int8_t dd = aa < bb ? aa : bb; 1318dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1319dd0a0fcdSRichard Henderson } 1320dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1321dd0a0fcdSRichard Henderson } 1322dd0a0fcdSRichard Henderson 1323dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1324dd0a0fcdSRichard Henderson { 1325dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1326dd0a0fcdSRichard Henderson intptr_t i; 1327dd0a0fcdSRichard Henderson 1328dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1329dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1330dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1331dd0a0fcdSRichard Henderson int16_t dd = aa < bb ? aa : bb; 1332dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1333dd0a0fcdSRichard Henderson } 1334dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1335dd0a0fcdSRichard Henderson } 1336dd0a0fcdSRichard Henderson 1337dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1338dd0a0fcdSRichard Henderson { 1339dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1340dd0a0fcdSRichard Henderson intptr_t i; 1341dd0a0fcdSRichard Henderson 1342dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1343dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1344dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1345dd0a0fcdSRichard Henderson int32_t dd = aa < bb ? aa : bb; 1346dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1347dd0a0fcdSRichard Henderson } 1348dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1349dd0a0fcdSRichard Henderson } 1350dd0a0fcdSRichard Henderson 1351dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1352dd0a0fcdSRichard Henderson { 1353dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1354dd0a0fcdSRichard Henderson intptr_t i; 1355dd0a0fcdSRichard Henderson 1356dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1357dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1358dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1359dd0a0fcdSRichard Henderson int64_t dd = aa < bb ? aa : bb; 1360dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1361dd0a0fcdSRichard Henderson } 1362dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1363dd0a0fcdSRichard Henderson } 1364dd0a0fcdSRichard Henderson 1365dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1366dd0a0fcdSRichard Henderson { 1367dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1368dd0a0fcdSRichard Henderson intptr_t i; 1369dd0a0fcdSRichard Henderson 1370dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1371dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1372dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1373dd0a0fcdSRichard Henderson int8_t dd = aa > bb ? aa : bb; 1374dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1375dd0a0fcdSRichard Henderson } 1376dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1377dd0a0fcdSRichard Henderson } 1378dd0a0fcdSRichard Henderson 1379dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1380dd0a0fcdSRichard Henderson { 1381dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1382dd0a0fcdSRichard Henderson intptr_t i; 1383dd0a0fcdSRichard Henderson 1384dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1385dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1386dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1387dd0a0fcdSRichard Henderson int16_t dd = aa > bb ? aa : bb; 1388dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1389dd0a0fcdSRichard Henderson } 1390dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1391dd0a0fcdSRichard Henderson } 1392dd0a0fcdSRichard Henderson 1393dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1394dd0a0fcdSRichard Henderson { 1395dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1396dd0a0fcdSRichard Henderson intptr_t i; 1397dd0a0fcdSRichard Henderson 1398dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1399dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1400dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1401dd0a0fcdSRichard Henderson int32_t dd = aa > bb ? aa : bb; 1402dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1403dd0a0fcdSRichard Henderson } 1404dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1405dd0a0fcdSRichard Henderson } 1406dd0a0fcdSRichard Henderson 1407dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1408dd0a0fcdSRichard Henderson { 1409dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1410dd0a0fcdSRichard Henderson intptr_t i; 1411dd0a0fcdSRichard Henderson 1412dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1413dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1414dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1415dd0a0fcdSRichard Henderson int64_t dd = aa > bb ? aa : bb; 1416dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1417dd0a0fcdSRichard Henderson } 1418dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1419dd0a0fcdSRichard Henderson } 1420dd0a0fcdSRichard Henderson 1421dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1422dd0a0fcdSRichard Henderson { 1423dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1424dd0a0fcdSRichard Henderson intptr_t i; 1425dd0a0fcdSRichard Henderson 1426dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1427dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1428dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1429dd0a0fcdSRichard Henderson uint8_t dd = aa < bb ? aa : bb; 1430dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1431dd0a0fcdSRichard Henderson } 1432dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1433dd0a0fcdSRichard Henderson } 1434dd0a0fcdSRichard Henderson 1435dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1436dd0a0fcdSRichard Henderson { 1437dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1438dd0a0fcdSRichard Henderson intptr_t i; 1439dd0a0fcdSRichard Henderson 1440dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1441dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1442dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1443dd0a0fcdSRichard Henderson uint16_t dd = aa < bb ? aa : bb; 1444dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1445dd0a0fcdSRichard Henderson } 1446dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1447dd0a0fcdSRichard Henderson } 1448dd0a0fcdSRichard Henderson 1449dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1450dd0a0fcdSRichard Henderson { 1451dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1452dd0a0fcdSRichard Henderson intptr_t i; 1453dd0a0fcdSRichard Henderson 1454dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1455dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1456dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1457dd0a0fcdSRichard Henderson uint32_t dd = aa < bb ? aa : bb; 1458dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1459dd0a0fcdSRichard Henderson } 1460dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1461dd0a0fcdSRichard Henderson } 1462dd0a0fcdSRichard Henderson 1463dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1464dd0a0fcdSRichard Henderson { 1465dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1466dd0a0fcdSRichard Henderson intptr_t i; 1467dd0a0fcdSRichard Henderson 1468dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1469dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1470dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1471dd0a0fcdSRichard Henderson uint64_t dd = aa < bb ? aa : bb; 1472dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1473dd0a0fcdSRichard Henderson } 1474dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1475dd0a0fcdSRichard Henderson } 1476dd0a0fcdSRichard Henderson 1477dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1478dd0a0fcdSRichard Henderson { 1479dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1480dd0a0fcdSRichard Henderson intptr_t i; 1481dd0a0fcdSRichard Henderson 1482dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1483dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1484dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1485dd0a0fcdSRichard Henderson uint8_t dd = aa > bb ? aa : bb; 1486dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1487dd0a0fcdSRichard Henderson } 1488dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1489dd0a0fcdSRichard Henderson } 1490dd0a0fcdSRichard Henderson 1491dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1492dd0a0fcdSRichard Henderson { 1493dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1494dd0a0fcdSRichard Henderson intptr_t i; 1495dd0a0fcdSRichard Henderson 1496dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1497dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1498dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1499dd0a0fcdSRichard Henderson uint16_t dd = aa > bb ? aa : bb; 1500dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1501dd0a0fcdSRichard Henderson } 1502dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1503dd0a0fcdSRichard Henderson } 1504dd0a0fcdSRichard Henderson 1505dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1506dd0a0fcdSRichard Henderson { 1507dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1508dd0a0fcdSRichard Henderson intptr_t i; 1509dd0a0fcdSRichard Henderson 1510dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1511dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1512dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1513dd0a0fcdSRichard Henderson uint32_t dd = aa > bb ? aa : bb; 1514dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1515dd0a0fcdSRichard Henderson } 1516dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1517dd0a0fcdSRichard Henderson } 1518dd0a0fcdSRichard Henderson 1519dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1520dd0a0fcdSRichard Henderson { 1521dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1522dd0a0fcdSRichard Henderson intptr_t i; 1523dd0a0fcdSRichard Henderson 1524dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1525dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1526dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1527dd0a0fcdSRichard Henderson uint64_t dd = aa > bb ? aa : bb; 1528dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1529dd0a0fcdSRichard Henderson } 1530dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1531dd0a0fcdSRichard Henderson } 153238dc1294SRichard Henderson 153338dc1294SRichard Henderson void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) 153438dc1294SRichard Henderson { 153538dc1294SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 153638dc1294SRichard Henderson intptr_t i; 153738dc1294SRichard Henderson 15386c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 15396c7ab301SRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 15406c7ab301SRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 15416c7ab301SRichard Henderson uint64_t cc = *(uint64_t *)(c + i); 15426c7ab301SRichard Henderson *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa); 154338dc1294SRichard Henderson } 154438dc1294SRichard Henderson clear_high(d, oprsz, desc); 154538dc1294SRichard Henderson } 1546