1db432672SRichard Henderson /* 2db432672SRichard Henderson * Generic vectorized operation runtime 3db432672SRichard Henderson * 4db432672SRichard Henderson * Copyright (c) 2018 Linaro 5db432672SRichard Henderson * 6db432672SRichard Henderson * This library is free software; you can redistribute it and/or 7db432672SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8db432672SRichard Henderson * License as published by the Free Software Foundation; either 9fb0343d5SThomas Huth * version 2.1 of the License, or (at your option) any later version. 10db432672SRichard Henderson * 11db432672SRichard Henderson * This library is distributed in the hope that it will be useful, 12db432672SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13db432672SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14db432672SRichard Henderson * Lesser General Public License for more details. 15db432672SRichard Henderson * 16db432672SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17db432672SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18db432672SRichard Henderson */ 19db432672SRichard Henderson 20db432672SRichard Henderson #include "qemu/osdep.h" 21db432672SRichard Henderson #include "qemu/host-utils.h" 22db432672SRichard Henderson #include "cpu.h" 23db432672SRichard Henderson #include "exec/helper-proto.h" 24dcb32f1dSPhilippe Mathieu-Daudé #include "tcg/tcg-gvec-desc.h" 25db432672SRichard Henderson 26db432672SRichard Henderson 27db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 28db432672SRichard Henderson { 29db432672SRichard Henderson intptr_t maxsz = simd_maxsz(desc); 30db432672SRichard Henderson intptr_t i; 31db432672SRichard Henderson 32db432672SRichard Henderson if (unlikely(maxsz > oprsz)) { 33db432672SRichard Henderson for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 34db432672SRichard Henderson *(uint64_t *)(d + i) = 0; 35db432672SRichard Henderson } 36db432672SRichard Henderson } 37db432672SRichard Henderson } 38db432672SRichard Henderson 39db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 40db432672SRichard Henderson { 41db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 42db432672SRichard Henderson intptr_t i; 43db432672SRichard Henderson 446c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 456c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 46db432672SRichard Henderson } 47db432672SRichard Henderson clear_high(d, oprsz, desc); 48db432672SRichard Henderson } 49db432672SRichard Henderson 50db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 51db432672SRichard Henderson { 52db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 53db432672SRichard Henderson intptr_t i; 54db432672SRichard Henderson 556c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 566c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 57db432672SRichard Henderson } 58db432672SRichard Henderson clear_high(d, oprsz, desc); 59db432672SRichard Henderson } 60db432672SRichard Henderson 61db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 62db432672SRichard Henderson { 63db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 64db432672SRichard Henderson intptr_t i; 65db432672SRichard Henderson 666c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 676c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i); 68db432672SRichard Henderson } 69db432672SRichard Henderson clear_high(d, oprsz, desc); 70db432672SRichard Henderson } 71db432672SRichard Henderson 72db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 73db432672SRichard Henderson { 74db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 75db432672SRichard Henderson intptr_t i; 76db432672SRichard Henderson 776c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 786c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i); 79db432672SRichard Henderson } 80db432672SRichard Henderson clear_high(d, oprsz, desc); 81db432672SRichard Henderson } 82db432672SRichard Henderson 8322fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 8422fc3527SRichard Henderson { 8522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8622fc3527SRichard Henderson intptr_t i; 8722fc3527SRichard Henderson 886c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 89*0a83e43aSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b; 9022fc3527SRichard Henderson } 9122fc3527SRichard Henderson clear_high(d, oprsz, desc); 9222fc3527SRichard Henderson } 9322fc3527SRichard Henderson 9422fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 9522fc3527SRichard Henderson { 9622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 9722fc3527SRichard Henderson intptr_t i; 9822fc3527SRichard Henderson 996c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 100*0a83e43aSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b; 10122fc3527SRichard Henderson } 10222fc3527SRichard Henderson clear_high(d, oprsz, desc); 10322fc3527SRichard Henderson } 10422fc3527SRichard Henderson 10522fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 10622fc3527SRichard Henderson { 10722fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 10822fc3527SRichard Henderson intptr_t i; 10922fc3527SRichard Henderson 1106c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 111*0a83e43aSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b; 11222fc3527SRichard Henderson } 11322fc3527SRichard Henderson clear_high(d, oprsz, desc); 11422fc3527SRichard Henderson } 11522fc3527SRichard Henderson 11622fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 11722fc3527SRichard Henderson { 11822fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 11922fc3527SRichard Henderson intptr_t i; 12022fc3527SRichard Henderson 1216c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 122*0a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b; 12322fc3527SRichard Henderson } 12422fc3527SRichard Henderson clear_high(d, oprsz, desc); 12522fc3527SRichard Henderson } 12622fc3527SRichard Henderson 127db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 128db432672SRichard Henderson { 129db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 130db432672SRichard Henderson intptr_t i; 131db432672SRichard Henderson 1326c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1336c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 134db432672SRichard Henderson } 135db432672SRichard Henderson clear_high(d, oprsz, desc); 136db432672SRichard Henderson } 137db432672SRichard Henderson 138db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 139db432672SRichard Henderson { 140db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 141db432672SRichard Henderson intptr_t i; 142db432672SRichard Henderson 1436c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1446c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 145db432672SRichard Henderson } 146db432672SRichard Henderson clear_high(d, oprsz, desc); 147db432672SRichard Henderson } 148db432672SRichard Henderson 149db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 150db432672SRichard Henderson { 151db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 152db432672SRichard Henderson intptr_t i; 153db432672SRichard Henderson 1546c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1556c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i); 156db432672SRichard Henderson } 157db432672SRichard Henderson clear_high(d, oprsz, desc); 158db432672SRichard Henderson } 159db432672SRichard Henderson 160db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 161db432672SRichard Henderson { 162db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 163db432672SRichard Henderson intptr_t i; 164db432672SRichard Henderson 1656c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1666c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i); 167db432672SRichard Henderson } 168db432672SRichard Henderson clear_high(d, oprsz, desc); 169db432672SRichard Henderson } 170db432672SRichard Henderson 17122fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 17222fc3527SRichard Henderson { 17322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 17422fc3527SRichard Henderson intptr_t i; 17522fc3527SRichard Henderson 1766c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 177*0a83e43aSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b; 17822fc3527SRichard Henderson } 17922fc3527SRichard Henderson clear_high(d, oprsz, desc); 18022fc3527SRichard Henderson } 18122fc3527SRichard Henderson 18222fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 18322fc3527SRichard Henderson { 18422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 18522fc3527SRichard Henderson intptr_t i; 18622fc3527SRichard Henderson 1876c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 188*0a83e43aSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b; 18922fc3527SRichard Henderson } 19022fc3527SRichard Henderson clear_high(d, oprsz, desc); 19122fc3527SRichard Henderson } 19222fc3527SRichard Henderson 19322fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 19422fc3527SRichard Henderson { 19522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 19622fc3527SRichard Henderson intptr_t i; 19722fc3527SRichard Henderson 1986c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 199*0a83e43aSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b; 20022fc3527SRichard Henderson } 20122fc3527SRichard Henderson clear_high(d, oprsz, desc); 20222fc3527SRichard Henderson } 20322fc3527SRichard Henderson 20422fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 20522fc3527SRichard Henderson { 20622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 20722fc3527SRichard Henderson intptr_t i; 20822fc3527SRichard Henderson 2096c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 210*0a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b; 21122fc3527SRichard Henderson } 21222fc3527SRichard Henderson clear_high(d, oprsz, desc); 21322fc3527SRichard Henderson } 21422fc3527SRichard Henderson 2153774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 2163774030aSRichard Henderson { 2173774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2183774030aSRichard Henderson intptr_t i; 2193774030aSRichard Henderson 2206c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 2216c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i); 2223774030aSRichard Henderson } 2233774030aSRichard Henderson clear_high(d, oprsz, desc); 2243774030aSRichard Henderson } 2253774030aSRichard Henderson 2263774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 2273774030aSRichard Henderson { 2283774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2293774030aSRichard Henderson intptr_t i; 2303774030aSRichard Henderson 2316c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 2326c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i); 2333774030aSRichard Henderson } 2343774030aSRichard Henderson clear_high(d, oprsz, desc); 2353774030aSRichard Henderson } 2363774030aSRichard Henderson 2373774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 2383774030aSRichard Henderson { 2393774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2403774030aSRichard Henderson intptr_t i; 2413774030aSRichard Henderson 2426c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 2436c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i); 2443774030aSRichard Henderson } 2453774030aSRichard Henderson clear_high(d, oprsz, desc); 2463774030aSRichard Henderson } 2473774030aSRichard Henderson 2483774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 2493774030aSRichard Henderson { 2503774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2513774030aSRichard Henderson intptr_t i; 2523774030aSRichard Henderson 2536c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 2546c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i); 2553774030aSRichard Henderson } 2563774030aSRichard Henderson clear_high(d, oprsz, desc); 2573774030aSRichard Henderson } 2583774030aSRichard Henderson 25922fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 26022fc3527SRichard Henderson { 26122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 26222fc3527SRichard Henderson intptr_t i; 26322fc3527SRichard Henderson 2646c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 265*0a83e43aSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b; 26622fc3527SRichard Henderson } 26722fc3527SRichard Henderson clear_high(d, oprsz, desc); 26822fc3527SRichard Henderson } 26922fc3527SRichard Henderson 27022fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 27122fc3527SRichard Henderson { 27222fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 27322fc3527SRichard Henderson intptr_t i; 27422fc3527SRichard Henderson 2756c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 276*0a83e43aSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b; 27722fc3527SRichard Henderson } 27822fc3527SRichard Henderson clear_high(d, oprsz, desc); 27922fc3527SRichard Henderson } 28022fc3527SRichard Henderson 28122fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 28222fc3527SRichard Henderson { 28322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 28422fc3527SRichard Henderson intptr_t i; 28522fc3527SRichard Henderson 2866c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 287*0a83e43aSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b; 28822fc3527SRichard Henderson } 28922fc3527SRichard Henderson clear_high(d, oprsz, desc); 29022fc3527SRichard Henderson } 29122fc3527SRichard Henderson 29222fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 29322fc3527SRichard Henderson { 29422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 29522fc3527SRichard Henderson intptr_t i; 29622fc3527SRichard Henderson 2976c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 298*0a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b; 29922fc3527SRichard Henderson } 30022fc3527SRichard Henderson clear_high(d, oprsz, desc); 30122fc3527SRichard Henderson } 30222fc3527SRichard Henderson 303db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 304db432672SRichard Henderson { 305db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 306db432672SRichard Henderson intptr_t i; 307db432672SRichard Henderson 3086c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 3096c7ab301SRichard Henderson *(uint8_t *)(d + i) = -*(uint8_t *)(a + i); 310db432672SRichard Henderson } 311db432672SRichard Henderson clear_high(d, oprsz, desc); 312db432672SRichard Henderson } 313db432672SRichard Henderson 314db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 315db432672SRichard Henderson { 316db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 317db432672SRichard Henderson intptr_t i; 318db432672SRichard Henderson 3196c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 3206c7ab301SRichard Henderson *(uint16_t *)(d + i) = -*(uint16_t *)(a + i); 321db432672SRichard Henderson } 322db432672SRichard Henderson clear_high(d, oprsz, desc); 323db432672SRichard Henderson } 324db432672SRichard Henderson 325db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 326db432672SRichard Henderson { 327db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 328db432672SRichard Henderson intptr_t i; 329db432672SRichard Henderson 3306c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 3316c7ab301SRichard Henderson *(uint32_t *)(d + i) = -*(uint32_t *)(a + i); 332db432672SRichard Henderson } 333db432672SRichard Henderson clear_high(d, oprsz, desc); 334db432672SRichard Henderson } 335db432672SRichard Henderson 336db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 337db432672SRichard Henderson { 338db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 339db432672SRichard Henderson intptr_t i; 340db432672SRichard Henderson 3416c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 3426c7ab301SRichard Henderson *(uint64_t *)(d + i) = -*(uint64_t *)(a + i); 343db432672SRichard Henderson } 344db432672SRichard Henderson clear_high(d, oprsz, desc); 345db432672SRichard Henderson } 346db432672SRichard Henderson 347bcefc902SRichard Henderson void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) 348bcefc902SRichard Henderson { 349bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 350bcefc902SRichard Henderson intptr_t i; 351bcefc902SRichard Henderson 352bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 353bcefc902SRichard Henderson int8_t aa = *(int8_t *)(a + i); 354bcefc902SRichard Henderson *(int8_t *)(d + i) = aa < 0 ? -aa : aa; 355bcefc902SRichard Henderson } 356bcefc902SRichard Henderson clear_high(d, oprsz, desc); 357bcefc902SRichard Henderson } 358bcefc902SRichard Henderson 359bcefc902SRichard Henderson void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) 360bcefc902SRichard Henderson { 361bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 362bcefc902SRichard Henderson intptr_t i; 363bcefc902SRichard Henderson 364bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 365bcefc902SRichard Henderson int16_t aa = *(int16_t *)(a + i); 366bcefc902SRichard Henderson *(int16_t *)(d + i) = aa < 0 ? -aa : aa; 367bcefc902SRichard Henderson } 368bcefc902SRichard Henderson clear_high(d, oprsz, desc); 369bcefc902SRichard Henderson } 370bcefc902SRichard Henderson 371bcefc902SRichard Henderson void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) 372bcefc902SRichard Henderson { 373bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 374bcefc902SRichard Henderson intptr_t i; 375bcefc902SRichard Henderson 376bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 377bcefc902SRichard Henderson int32_t aa = *(int32_t *)(a + i); 378bcefc902SRichard Henderson *(int32_t *)(d + i) = aa < 0 ? -aa : aa; 379bcefc902SRichard Henderson } 380bcefc902SRichard Henderson clear_high(d, oprsz, desc); 381bcefc902SRichard Henderson } 382bcefc902SRichard Henderson 383bcefc902SRichard Henderson void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) 384bcefc902SRichard Henderson { 385bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 386bcefc902SRichard Henderson intptr_t i; 387bcefc902SRichard Henderson 388bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 389bcefc902SRichard Henderson int64_t aa = *(int64_t *)(a + i); 390bcefc902SRichard Henderson *(int64_t *)(d + i) = aa < 0 ? -aa : aa; 391bcefc902SRichard Henderson } 392bcefc902SRichard Henderson clear_high(d, oprsz, desc); 393bcefc902SRichard Henderson } 394bcefc902SRichard Henderson 395db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 396db432672SRichard Henderson { 397db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 398db432672SRichard Henderson 399db432672SRichard Henderson memcpy(d, a, oprsz); 400db432672SRichard Henderson clear_high(d, oprsz, desc); 401db432672SRichard Henderson } 402db432672SRichard Henderson 403db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 404db432672SRichard Henderson { 405db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 406db432672SRichard Henderson intptr_t i; 407db432672SRichard Henderson 408db432672SRichard Henderson if (c == 0) { 409db432672SRichard Henderson oprsz = 0; 410db432672SRichard Henderson } else { 411db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 412db432672SRichard Henderson *(uint64_t *)(d + i) = c; 413db432672SRichard Henderson } 414db432672SRichard Henderson } 415db432672SRichard Henderson clear_high(d, oprsz, desc); 416db432672SRichard Henderson } 417db432672SRichard Henderson 418db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 419db432672SRichard Henderson { 420db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 421db432672SRichard Henderson intptr_t i; 422db432672SRichard Henderson 423db432672SRichard Henderson if (c == 0) { 424db432672SRichard Henderson oprsz = 0; 425db432672SRichard Henderson } else { 426db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 427db432672SRichard Henderson *(uint32_t *)(d + i) = c; 428db432672SRichard Henderson } 429db432672SRichard Henderson } 430db432672SRichard Henderson clear_high(d, oprsz, desc); 431db432672SRichard Henderson } 432db432672SRichard Henderson 433db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 434db432672SRichard Henderson { 435db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 436db432672SRichard Henderson } 437db432672SRichard Henderson 438db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 439db432672SRichard Henderson { 440db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 441db432672SRichard Henderson } 442db432672SRichard Henderson 443db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 444db432672SRichard Henderson { 445db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 446db432672SRichard Henderson intptr_t i; 447db432672SRichard Henderson 4486c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 4496c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i); 450db432672SRichard Henderson } 451db432672SRichard Henderson clear_high(d, oprsz, desc); 452db432672SRichard Henderson } 453db432672SRichard Henderson 454db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 455db432672SRichard Henderson { 456db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 457db432672SRichard Henderson intptr_t i; 458db432672SRichard Henderson 4596c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 4606c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i); 461db432672SRichard Henderson } 462db432672SRichard Henderson clear_high(d, oprsz, desc); 463db432672SRichard Henderson } 464db432672SRichard Henderson 465db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 466db432672SRichard Henderson { 467db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 468db432672SRichard Henderson intptr_t i; 469db432672SRichard Henderson 4706c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 4716c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i); 472db432672SRichard Henderson } 473db432672SRichard Henderson clear_high(d, oprsz, desc); 474db432672SRichard Henderson } 475db432672SRichard Henderson 476db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 477db432672SRichard Henderson { 478db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 479db432672SRichard Henderson intptr_t i; 480db432672SRichard Henderson 4816c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 4826c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i); 483db432672SRichard Henderson } 484db432672SRichard Henderson clear_high(d, oprsz, desc); 485db432672SRichard Henderson } 486db432672SRichard Henderson 487db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 488db432672SRichard Henderson { 489db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 490db432672SRichard Henderson intptr_t i; 491db432672SRichard Henderson 4926c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 4936c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i); 494db432672SRichard Henderson } 495db432672SRichard Henderson clear_high(d, oprsz, desc); 496db432672SRichard Henderson } 497db432672SRichard Henderson 498db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 499db432672SRichard Henderson { 500db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 501db432672SRichard Henderson intptr_t i; 502db432672SRichard Henderson 5036c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5046c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i); 505db432672SRichard Henderson } 506db432672SRichard Henderson clear_high(d, oprsz, desc); 507db432672SRichard Henderson } 508d0ec9796SRichard Henderson 509f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 510f550805dSRichard Henderson { 511f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 512f550805dSRichard Henderson intptr_t i; 513f550805dSRichard Henderson 5146c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5156c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i)); 516f550805dSRichard Henderson } 517f550805dSRichard Henderson clear_high(d, oprsz, desc); 518f550805dSRichard Henderson } 519f550805dSRichard Henderson 520f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 521f550805dSRichard Henderson { 522f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 523f550805dSRichard Henderson intptr_t i; 524f550805dSRichard Henderson 5256c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5266c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i)); 527f550805dSRichard Henderson } 528f550805dSRichard Henderson clear_high(d, oprsz, desc); 529f550805dSRichard Henderson } 530f550805dSRichard Henderson 531f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 532f550805dSRichard Henderson { 533f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 534f550805dSRichard Henderson intptr_t i; 535f550805dSRichard Henderson 5366c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 5376c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i)); 538f550805dSRichard Henderson } 539f550805dSRichard Henderson clear_high(d, oprsz, desc); 540f550805dSRichard Henderson } 541f550805dSRichard Henderson 54222fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 54322fc3527SRichard Henderson { 54422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 54522fc3527SRichard Henderson intptr_t i; 54622fc3527SRichard Henderson 5476c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 548*0a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b; 54922fc3527SRichard Henderson } 55022fc3527SRichard Henderson clear_high(d, oprsz, desc); 55122fc3527SRichard Henderson } 55222fc3527SRichard Henderson 55322fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 55422fc3527SRichard Henderson { 55522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 55622fc3527SRichard Henderson intptr_t i; 55722fc3527SRichard Henderson 5586c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 559*0a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b; 56022fc3527SRichard Henderson } 56122fc3527SRichard Henderson clear_high(d, oprsz, desc); 56222fc3527SRichard Henderson } 56322fc3527SRichard Henderson 56422fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 56522fc3527SRichard Henderson { 56622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 56722fc3527SRichard Henderson intptr_t i; 56822fc3527SRichard Henderson 5696c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 570*0a83e43aSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b; 57122fc3527SRichard Henderson } 57222fc3527SRichard Henderson clear_high(d, oprsz, desc); 57322fc3527SRichard Henderson } 57422fc3527SRichard Henderson 575d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 576d0ec9796SRichard Henderson { 577d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 578d0ec9796SRichard Henderson int shift = simd_data(desc); 579d0ec9796SRichard Henderson intptr_t i; 580d0ec9796SRichard Henderson 5816c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 5826c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift; 583d0ec9796SRichard Henderson } 584d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 585d0ec9796SRichard Henderson } 586d0ec9796SRichard Henderson 587d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 588d0ec9796SRichard Henderson { 589d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 590d0ec9796SRichard Henderson int shift = simd_data(desc); 591d0ec9796SRichard Henderson intptr_t i; 592d0ec9796SRichard Henderson 5936c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 5946c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift; 595d0ec9796SRichard Henderson } 596d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 597d0ec9796SRichard Henderson } 598d0ec9796SRichard Henderson 599d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 600d0ec9796SRichard Henderson { 601d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 602d0ec9796SRichard Henderson int shift = simd_data(desc); 603d0ec9796SRichard Henderson intptr_t i; 604d0ec9796SRichard Henderson 6056c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 6066c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift; 607d0ec9796SRichard Henderson } 608d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 609d0ec9796SRichard Henderson } 610d0ec9796SRichard Henderson 611d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 612d0ec9796SRichard Henderson { 613d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 614d0ec9796SRichard Henderson int shift = simd_data(desc); 615d0ec9796SRichard Henderson intptr_t i; 616d0ec9796SRichard Henderson 6176c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 6186c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift; 619d0ec9796SRichard Henderson } 620d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 621d0ec9796SRichard Henderson } 622d0ec9796SRichard Henderson 623d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 624d0ec9796SRichard Henderson { 625d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 626d0ec9796SRichard Henderson int shift = simd_data(desc); 627d0ec9796SRichard Henderson intptr_t i; 628d0ec9796SRichard Henderson 6296c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 6306c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift; 631d0ec9796SRichard Henderson } 632d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 633d0ec9796SRichard Henderson } 634d0ec9796SRichard Henderson 635d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 636d0ec9796SRichard Henderson { 637d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 638d0ec9796SRichard Henderson int shift = simd_data(desc); 639d0ec9796SRichard Henderson intptr_t i; 640d0ec9796SRichard Henderson 6416c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 6426c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift; 643d0ec9796SRichard Henderson } 644d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 645d0ec9796SRichard Henderson } 646d0ec9796SRichard Henderson 647d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 648d0ec9796SRichard Henderson { 649d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 650d0ec9796SRichard Henderson int shift = simd_data(desc); 651d0ec9796SRichard Henderson intptr_t i; 652d0ec9796SRichard Henderson 6536c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 6546c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift; 655d0ec9796SRichard Henderson } 656d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 657d0ec9796SRichard Henderson } 658d0ec9796SRichard Henderson 659d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 660d0ec9796SRichard Henderson { 661d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 662d0ec9796SRichard Henderson int shift = simd_data(desc); 663d0ec9796SRichard Henderson intptr_t i; 664d0ec9796SRichard Henderson 6656c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 6666c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift; 667d0ec9796SRichard Henderson } 668d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 669d0ec9796SRichard Henderson } 670d0ec9796SRichard Henderson 671d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 672d0ec9796SRichard Henderson { 673d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 674d0ec9796SRichard Henderson int shift = simd_data(desc); 675d0ec9796SRichard Henderson intptr_t i; 676d0ec9796SRichard Henderson 6776c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 6786c7ab301SRichard Henderson *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift; 679d0ec9796SRichard Henderson } 680d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 681d0ec9796SRichard Henderson } 682d0ec9796SRichard Henderson 683d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 684d0ec9796SRichard Henderson { 685d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 686d0ec9796SRichard Henderson int shift = simd_data(desc); 687d0ec9796SRichard Henderson intptr_t i; 688d0ec9796SRichard Henderson 6896c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 6906c7ab301SRichard Henderson *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift; 691d0ec9796SRichard Henderson } 692d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 693d0ec9796SRichard Henderson } 694d0ec9796SRichard Henderson 695d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 696d0ec9796SRichard Henderson { 697d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 698d0ec9796SRichard Henderson int shift = simd_data(desc); 699d0ec9796SRichard Henderson intptr_t i; 700d0ec9796SRichard Henderson 7016c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 7026c7ab301SRichard Henderson *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift; 703d0ec9796SRichard Henderson } 704d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 705d0ec9796SRichard Henderson } 706d0ec9796SRichard Henderson 707d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 708d0ec9796SRichard Henderson { 709d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 710d0ec9796SRichard Henderson int shift = simd_data(desc); 711d0ec9796SRichard Henderson intptr_t i; 712d0ec9796SRichard Henderson 7136c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 7146c7ab301SRichard Henderson *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift; 715d0ec9796SRichard Henderson } 716d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 717d0ec9796SRichard Henderson } 718212be173SRichard Henderson 7195ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 7205ee5c14cSRichard Henderson { 7215ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7225ee5c14cSRichard Henderson intptr_t i; 7235ee5c14cSRichard Henderson 7245ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 7255ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 7265ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 7275ee5c14cSRichard Henderson } 7285ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7295ee5c14cSRichard Henderson } 7305ee5c14cSRichard Henderson 7315ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 7325ee5c14cSRichard Henderson { 7335ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7345ee5c14cSRichard Henderson intptr_t i; 7355ee5c14cSRichard Henderson 7365ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 7375ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 7385ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 7395ee5c14cSRichard Henderson } 7405ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7415ee5c14cSRichard Henderson } 7425ee5c14cSRichard Henderson 7435ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 7445ee5c14cSRichard Henderson { 7455ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7465ee5c14cSRichard Henderson intptr_t i; 7475ee5c14cSRichard Henderson 7485ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 7495ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 7505ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 7515ee5c14cSRichard Henderson } 7525ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7535ee5c14cSRichard Henderson } 7545ee5c14cSRichard Henderson 7555ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 7565ee5c14cSRichard Henderson { 7575ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7585ee5c14cSRichard Henderson intptr_t i; 7595ee5c14cSRichard Henderson 7605ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 7615ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 7625ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 7635ee5c14cSRichard Henderson } 7645ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7655ee5c14cSRichard Henderson } 7665ee5c14cSRichard Henderson 7675ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 7685ee5c14cSRichard Henderson { 7695ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7705ee5c14cSRichard Henderson intptr_t i; 7715ee5c14cSRichard Henderson 7725ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 7735ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 7745ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 7755ee5c14cSRichard Henderson } 7765ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7775ee5c14cSRichard Henderson } 7785ee5c14cSRichard Henderson 7795ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 7805ee5c14cSRichard Henderson { 7815ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7825ee5c14cSRichard Henderson intptr_t i; 7835ee5c14cSRichard Henderson 7845ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 7855ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 7865ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 7875ee5c14cSRichard Henderson } 7885ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7895ee5c14cSRichard Henderson } 7905ee5c14cSRichard Henderson 7915ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 7925ee5c14cSRichard Henderson { 7935ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7945ee5c14cSRichard Henderson intptr_t i; 7955ee5c14cSRichard Henderson 7965ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 7975ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 7985ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 7995ee5c14cSRichard Henderson } 8005ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8015ee5c14cSRichard Henderson } 8025ee5c14cSRichard Henderson 8035ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 8045ee5c14cSRichard Henderson { 8055ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8065ee5c14cSRichard Henderson intptr_t i; 8075ee5c14cSRichard Henderson 8085ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 8095ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 8105ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 8115ee5c14cSRichard Henderson } 8125ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8135ee5c14cSRichard Henderson } 8145ee5c14cSRichard Henderson 8155ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 8165ee5c14cSRichard Henderson { 8175ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8185ee5c14cSRichard Henderson intptr_t i; 8195ee5c14cSRichard Henderson 820899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 8215ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 8225ee5c14cSRichard Henderson *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 8235ee5c14cSRichard Henderson } 8245ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8255ee5c14cSRichard Henderson } 8265ee5c14cSRichard Henderson 8275ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 8285ee5c14cSRichard Henderson { 8295ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8305ee5c14cSRichard Henderson intptr_t i; 8315ee5c14cSRichard Henderson 8325ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 8335ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 8345ee5c14cSRichard Henderson *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 8355ee5c14cSRichard Henderson } 8365ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8375ee5c14cSRichard Henderson } 8385ee5c14cSRichard Henderson 8395ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 8405ee5c14cSRichard Henderson { 8415ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8425ee5c14cSRichard Henderson intptr_t i; 8435ee5c14cSRichard Henderson 844899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 8455ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 8465ee5c14cSRichard Henderson *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 8475ee5c14cSRichard Henderson } 8485ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8495ee5c14cSRichard Henderson } 8505ee5c14cSRichard Henderson 8515ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 8525ee5c14cSRichard Henderson { 8535ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8545ee5c14cSRichard Henderson intptr_t i; 8555ee5c14cSRichard Henderson 856899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 8575ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 8585ee5c14cSRichard Henderson *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 8595ee5c14cSRichard Henderson } 8605ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8615ee5c14cSRichard Henderson } 8625ee5c14cSRichard Henderson 863212be173SRichard Henderson #define DO_CMP0(X) -(X) 864212be173SRichard Henderson 865212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP) \ 866212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 867212be173SRichard Henderson { \ 868212be173SRichard Henderson intptr_t oprsz = simd_oprsz(desc); \ 869212be173SRichard Henderson intptr_t i; \ 8706cb1d3b8SRichard Henderson for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 871212be173SRichard Henderson *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 872212be173SRichard Henderson } \ 873212be173SRichard Henderson clear_high(d, oprsz, desc); \ 874212be173SRichard Henderson } 875212be173SRichard Henderson 876212be173SRichard Henderson #define DO_CMP2(SZ) \ 8776c7ab301SRichard Henderson DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \ 8786c7ab301SRichard Henderson DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \ 8796c7ab301SRichard Henderson DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \ 8806c7ab301SRichard Henderson DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \ 8816c7ab301SRichard Henderson DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \ 8826c7ab301SRichard Henderson DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=) 883212be173SRichard Henderson 884212be173SRichard Henderson DO_CMP2(8) 885212be173SRichard Henderson DO_CMP2(16) 886212be173SRichard Henderson DO_CMP2(32) 887212be173SRichard Henderson DO_CMP2(64) 888212be173SRichard Henderson 889212be173SRichard Henderson #undef DO_CMP0 890212be173SRichard Henderson #undef DO_CMP1 891212be173SRichard Henderson #undef DO_CMP2 892f49b12c6SRichard Henderson 893f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 894f49b12c6SRichard Henderson { 895f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 896f49b12c6SRichard Henderson intptr_t i; 897f49b12c6SRichard Henderson 898f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 899f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 900f49b12c6SRichard Henderson if (r > INT8_MAX) { 901f49b12c6SRichard Henderson r = INT8_MAX; 902f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 903f49b12c6SRichard Henderson r = INT8_MIN; 904f49b12c6SRichard Henderson } 905f49b12c6SRichard Henderson *(int8_t *)(d + i) = r; 906f49b12c6SRichard Henderson } 907f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 908f49b12c6SRichard Henderson } 909f49b12c6SRichard Henderson 910f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 911f49b12c6SRichard Henderson { 912f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 913f49b12c6SRichard Henderson intptr_t i; 914f49b12c6SRichard Henderson 915f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 916f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 917f49b12c6SRichard Henderson if (r > INT16_MAX) { 918f49b12c6SRichard Henderson r = INT16_MAX; 919f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 920f49b12c6SRichard Henderson r = INT16_MIN; 921f49b12c6SRichard Henderson } 922f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 923f49b12c6SRichard Henderson } 924f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 925f49b12c6SRichard Henderson } 926f49b12c6SRichard Henderson 927f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 928f49b12c6SRichard Henderson { 929f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 930f49b12c6SRichard Henderson intptr_t i; 931f49b12c6SRichard Henderson 932f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 933f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 934f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 935f49b12c6SRichard Henderson int32_t di = ai + bi; 936f49b12c6SRichard Henderson if (((di ^ ai) &~ (ai ^ bi)) < 0) { 937f49b12c6SRichard Henderson /* Signed overflow. */ 938f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 939f49b12c6SRichard Henderson } 940f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 941f49b12c6SRichard Henderson } 942f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 943f49b12c6SRichard Henderson } 944f49b12c6SRichard Henderson 945f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 946f49b12c6SRichard Henderson { 947f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 948f49b12c6SRichard Henderson intptr_t i; 949f49b12c6SRichard Henderson 950f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 951f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 952f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 953f49b12c6SRichard Henderson int64_t di = ai + bi; 954f49b12c6SRichard Henderson if (((di ^ ai) &~ (ai ^ bi)) < 0) { 955f49b12c6SRichard Henderson /* Signed overflow. */ 956f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 957f49b12c6SRichard Henderson } 958f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 959f49b12c6SRichard Henderson } 960f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 961f49b12c6SRichard Henderson } 962f49b12c6SRichard Henderson 963f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 964f49b12c6SRichard Henderson { 965f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 966f49b12c6SRichard Henderson intptr_t i; 967f49b12c6SRichard Henderson 968f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 969f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 970f49b12c6SRichard Henderson if (r > INT8_MAX) { 971f49b12c6SRichard Henderson r = INT8_MAX; 972f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 973f49b12c6SRichard Henderson r = INT8_MIN; 974f49b12c6SRichard Henderson } 975f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 976f49b12c6SRichard Henderson } 977f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 978f49b12c6SRichard Henderson } 979f49b12c6SRichard Henderson 980f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 981f49b12c6SRichard Henderson { 982f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 983f49b12c6SRichard Henderson intptr_t i; 984f49b12c6SRichard Henderson 985f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 986f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 987f49b12c6SRichard Henderson if (r > INT16_MAX) { 988f49b12c6SRichard Henderson r = INT16_MAX; 989f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 990f49b12c6SRichard Henderson r = INT16_MIN; 991f49b12c6SRichard Henderson } 992f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 993f49b12c6SRichard Henderson } 994f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 995f49b12c6SRichard Henderson } 996f49b12c6SRichard Henderson 997f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 998f49b12c6SRichard Henderson { 999f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1000f49b12c6SRichard Henderson intptr_t i; 1001f49b12c6SRichard Henderson 1002f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1003f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 1004f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 1005f49b12c6SRichard Henderson int32_t di = ai - bi; 1006f49b12c6SRichard Henderson if (((di ^ ai) & (ai ^ bi)) < 0) { 1007f49b12c6SRichard Henderson /* Signed overflow. */ 1008f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 1009f49b12c6SRichard Henderson } 1010f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 1011f49b12c6SRichard Henderson } 1012f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1013f49b12c6SRichard Henderson } 1014f49b12c6SRichard Henderson 1015f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1016f49b12c6SRichard Henderson { 1017f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1018f49b12c6SRichard Henderson intptr_t i; 1019f49b12c6SRichard Henderson 1020f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1021f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 1022f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 1023f49b12c6SRichard Henderson int64_t di = ai - bi; 1024f49b12c6SRichard Henderson if (((di ^ ai) & (ai ^ bi)) < 0) { 1025f49b12c6SRichard Henderson /* Signed overflow. */ 1026f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 1027f49b12c6SRichard Henderson } 1028f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 1029f49b12c6SRichard Henderson } 1030f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1031f49b12c6SRichard Henderson } 1032f49b12c6SRichard Henderson 1033f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1034f49b12c6SRichard Henderson { 1035f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1036f49b12c6SRichard Henderson intptr_t i; 1037f49b12c6SRichard Henderson 1038f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1039f49b12c6SRichard Henderson unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1040f49b12c6SRichard Henderson if (r > UINT8_MAX) { 1041f49b12c6SRichard Henderson r = UINT8_MAX; 1042f49b12c6SRichard Henderson } 1043f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1044f49b12c6SRichard Henderson } 1045f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1046f49b12c6SRichard Henderson } 1047f49b12c6SRichard Henderson 1048f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1049f49b12c6SRichard Henderson { 1050f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1051f49b12c6SRichard Henderson intptr_t i; 1052f49b12c6SRichard Henderson 1053f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1054f49b12c6SRichard Henderson unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1055f49b12c6SRichard Henderson if (r > UINT16_MAX) { 1056f49b12c6SRichard Henderson r = UINT16_MAX; 1057f49b12c6SRichard Henderson } 1058f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 1059f49b12c6SRichard Henderson } 1060f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1061f49b12c6SRichard Henderson } 1062f49b12c6SRichard Henderson 1063f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1064f49b12c6SRichard Henderson { 1065f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1066f49b12c6SRichard Henderson intptr_t i; 1067f49b12c6SRichard Henderson 1068f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1069f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1070f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 1071f49b12c6SRichard Henderson uint32_t di = ai + bi; 1072f49b12c6SRichard Henderson if (di < ai) { 1073f49b12c6SRichard Henderson di = UINT32_MAX; 1074f49b12c6SRichard Henderson } 1075f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1076f49b12c6SRichard Henderson } 1077f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1078f49b12c6SRichard Henderson } 1079f49b12c6SRichard Henderson 1080f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1081f49b12c6SRichard Henderson { 1082f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1083f49b12c6SRichard Henderson intptr_t i; 1084f49b12c6SRichard Henderson 1085f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1086f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1087f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 1088f49b12c6SRichard Henderson uint64_t di = ai + bi; 1089f49b12c6SRichard Henderson if (di < ai) { 1090f49b12c6SRichard Henderson di = UINT64_MAX; 1091f49b12c6SRichard Henderson } 1092f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1093f49b12c6SRichard Henderson } 1094f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1095f49b12c6SRichard Henderson } 1096f49b12c6SRichard Henderson 1097f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1098f49b12c6SRichard Henderson { 1099f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1100f49b12c6SRichard Henderson intptr_t i; 1101f49b12c6SRichard Henderson 1102f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1103f49b12c6SRichard Henderson int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1104f49b12c6SRichard Henderson if (r < 0) { 1105f49b12c6SRichard Henderson r = 0; 1106f49b12c6SRichard Henderson } 1107f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1108f49b12c6SRichard Henderson } 1109f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1110f49b12c6SRichard Henderson } 1111f49b12c6SRichard Henderson 1112f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1113f49b12c6SRichard Henderson { 1114f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1115f49b12c6SRichard Henderson intptr_t i; 1116f49b12c6SRichard Henderson 1117f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1118f49b12c6SRichard Henderson int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1119f49b12c6SRichard Henderson if (r < 0) { 1120f49b12c6SRichard Henderson r = 0; 1121f49b12c6SRichard Henderson } 1122f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 1123f49b12c6SRichard Henderson } 1124f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1125f49b12c6SRichard Henderson } 1126f49b12c6SRichard Henderson 1127f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1128f49b12c6SRichard Henderson { 1129f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1130f49b12c6SRichard Henderson intptr_t i; 1131f49b12c6SRichard Henderson 1132f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1133f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1134f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 1135f49b12c6SRichard Henderson uint32_t di = ai - bi; 1136f49b12c6SRichard Henderson if (ai < bi) { 1137f49b12c6SRichard Henderson di = 0; 1138f49b12c6SRichard Henderson } 1139f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1140f49b12c6SRichard Henderson } 1141f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1142f49b12c6SRichard Henderson } 1143f49b12c6SRichard Henderson 1144f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1145f49b12c6SRichard Henderson { 1146f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1147f49b12c6SRichard Henderson intptr_t i; 1148f49b12c6SRichard Henderson 1149f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1150f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1151f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 1152f49b12c6SRichard Henderson uint64_t di = ai - bi; 1153f49b12c6SRichard Henderson if (ai < bi) { 1154f49b12c6SRichard Henderson di = 0; 1155f49b12c6SRichard Henderson } 1156f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1157f49b12c6SRichard Henderson } 1158f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1159f49b12c6SRichard Henderson } 1160dd0a0fcdSRichard Henderson 1161dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1162dd0a0fcdSRichard Henderson { 1163dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1164dd0a0fcdSRichard Henderson intptr_t i; 1165dd0a0fcdSRichard Henderson 1166dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1167dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1168dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1169dd0a0fcdSRichard Henderson int8_t dd = aa < bb ? aa : bb; 1170dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1171dd0a0fcdSRichard Henderson } 1172dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1173dd0a0fcdSRichard Henderson } 1174dd0a0fcdSRichard Henderson 1175dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1176dd0a0fcdSRichard Henderson { 1177dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1178dd0a0fcdSRichard Henderson intptr_t i; 1179dd0a0fcdSRichard Henderson 1180dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1181dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1182dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1183dd0a0fcdSRichard Henderson int16_t dd = aa < bb ? aa : bb; 1184dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1185dd0a0fcdSRichard Henderson } 1186dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1187dd0a0fcdSRichard Henderson } 1188dd0a0fcdSRichard Henderson 1189dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1190dd0a0fcdSRichard Henderson { 1191dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1192dd0a0fcdSRichard Henderson intptr_t i; 1193dd0a0fcdSRichard Henderson 1194dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1195dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1196dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1197dd0a0fcdSRichard Henderson int32_t dd = aa < bb ? aa : bb; 1198dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1199dd0a0fcdSRichard Henderson } 1200dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1201dd0a0fcdSRichard Henderson } 1202dd0a0fcdSRichard Henderson 1203dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1204dd0a0fcdSRichard Henderson { 1205dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1206dd0a0fcdSRichard Henderson intptr_t i; 1207dd0a0fcdSRichard Henderson 1208dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1209dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1210dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1211dd0a0fcdSRichard Henderson int64_t dd = aa < bb ? aa : bb; 1212dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1213dd0a0fcdSRichard Henderson } 1214dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1215dd0a0fcdSRichard Henderson } 1216dd0a0fcdSRichard Henderson 1217dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1218dd0a0fcdSRichard Henderson { 1219dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1220dd0a0fcdSRichard Henderson intptr_t i; 1221dd0a0fcdSRichard Henderson 1222dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1223dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1224dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1225dd0a0fcdSRichard Henderson int8_t dd = aa > bb ? aa : bb; 1226dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1227dd0a0fcdSRichard Henderson } 1228dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1229dd0a0fcdSRichard Henderson } 1230dd0a0fcdSRichard Henderson 1231dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1232dd0a0fcdSRichard Henderson { 1233dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1234dd0a0fcdSRichard Henderson intptr_t i; 1235dd0a0fcdSRichard Henderson 1236dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1237dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1238dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1239dd0a0fcdSRichard Henderson int16_t dd = aa > bb ? aa : bb; 1240dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1241dd0a0fcdSRichard Henderson } 1242dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1243dd0a0fcdSRichard Henderson } 1244dd0a0fcdSRichard Henderson 1245dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1246dd0a0fcdSRichard Henderson { 1247dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1248dd0a0fcdSRichard Henderson intptr_t i; 1249dd0a0fcdSRichard Henderson 1250dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1251dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1252dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1253dd0a0fcdSRichard Henderson int32_t dd = aa > bb ? aa : bb; 1254dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1255dd0a0fcdSRichard Henderson } 1256dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1257dd0a0fcdSRichard Henderson } 1258dd0a0fcdSRichard Henderson 1259dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1260dd0a0fcdSRichard Henderson { 1261dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1262dd0a0fcdSRichard Henderson intptr_t i; 1263dd0a0fcdSRichard Henderson 1264dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1265dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1266dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1267dd0a0fcdSRichard Henderson int64_t dd = aa > bb ? aa : bb; 1268dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1269dd0a0fcdSRichard Henderson } 1270dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1271dd0a0fcdSRichard Henderson } 1272dd0a0fcdSRichard Henderson 1273dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1274dd0a0fcdSRichard Henderson { 1275dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1276dd0a0fcdSRichard Henderson intptr_t i; 1277dd0a0fcdSRichard Henderson 1278dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1279dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1280dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1281dd0a0fcdSRichard Henderson uint8_t dd = aa < bb ? aa : bb; 1282dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1283dd0a0fcdSRichard Henderson } 1284dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1285dd0a0fcdSRichard Henderson } 1286dd0a0fcdSRichard Henderson 1287dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1288dd0a0fcdSRichard Henderson { 1289dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1290dd0a0fcdSRichard Henderson intptr_t i; 1291dd0a0fcdSRichard Henderson 1292dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1293dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1294dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1295dd0a0fcdSRichard Henderson uint16_t dd = aa < bb ? aa : bb; 1296dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1297dd0a0fcdSRichard Henderson } 1298dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1299dd0a0fcdSRichard Henderson } 1300dd0a0fcdSRichard Henderson 1301dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1302dd0a0fcdSRichard Henderson { 1303dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1304dd0a0fcdSRichard Henderson intptr_t i; 1305dd0a0fcdSRichard Henderson 1306dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1307dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1308dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1309dd0a0fcdSRichard Henderson uint32_t dd = aa < bb ? aa : bb; 1310dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1311dd0a0fcdSRichard Henderson } 1312dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1313dd0a0fcdSRichard Henderson } 1314dd0a0fcdSRichard Henderson 1315dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1316dd0a0fcdSRichard Henderson { 1317dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1318dd0a0fcdSRichard Henderson intptr_t i; 1319dd0a0fcdSRichard Henderson 1320dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1321dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1322dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1323dd0a0fcdSRichard Henderson uint64_t dd = aa < bb ? aa : bb; 1324dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1325dd0a0fcdSRichard Henderson } 1326dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1327dd0a0fcdSRichard Henderson } 1328dd0a0fcdSRichard Henderson 1329dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1330dd0a0fcdSRichard Henderson { 1331dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1332dd0a0fcdSRichard Henderson intptr_t i; 1333dd0a0fcdSRichard Henderson 1334dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1335dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1336dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1337dd0a0fcdSRichard Henderson uint8_t dd = aa > bb ? aa : bb; 1338dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1339dd0a0fcdSRichard Henderson } 1340dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1341dd0a0fcdSRichard Henderson } 1342dd0a0fcdSRichard Henderson 1343dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1344dd0a0fcdSRichard Henderson { 1345dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1346dd0a0fcdSRichard Henderson intptr_t i; 1347dd0a0fcdSRichard Henderson 1348dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1349dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1350dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1351dd0a0fcdSRichard Henderson uint16_t dd = aa > bb ? aa : bb; 1352dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1353dd0a0fcdSRichard Henderson } 1354dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1355dd0a0fcdSRichard Henderson } 1356dd0a0fcdSRichard Henderson 1357dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1358dd0a0fcdSRichard Henderson { 1359dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1360dd0a0fcdSRichard Henderson intptr_t i; 1361dd0a0fcdSRichard Henderson 1362dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1363dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1364dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1365dd0a0fcdSRichard Henderson uint32_t dd = aa > bb ? aa : bb; 1366dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1367dd0a0fcdSRichard Henderson } 1368dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1369dd0a0fcdSRichard Henderson } 1370dd0a0fcdSRichard Henderson 1371dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1372dd0a0fcdSRichard Henderson { 1373dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1374dd0a0fcdSRichard Henderson intptr_t i; 1375dd0a0fcdSRichard Henderson 1376dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1377dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1378dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1379dd0a0fcdSRichard Henderson uint64_t dd = aa > bb ? aa : bb; 1380dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1381dd0a0fcdSRichard Henderson } 1382dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1383dd0a0fcdSRichard Henderson } 138438dc1294SRichard Henderson 138538dc1294SRichard Henderson void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) 138638dc1294SRichard Henderson { 138738dc1294SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 138838dc1294SRichard Henderson intptr_t i; 138938dc1294SRichard Henderson 13906c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 13916c7ab301SRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 13926c7ab301SRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 13936c7ab301SRichard Henderson uint64_t cc = *(uint64_t *)(c + i); 13946c7ab301SRichard Henderson *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa); 139538dc1294SRichard Henderson } 139638dc1294SRichard Henderson clear_high(d, oprsz, desc); 139738dc1294SRichard Henderson } 1398