1db432672SRichard Henderson /* 2db432672SRichard Henderson * Generic vectorized operation runtime 3db432672SRichard Henderson * 4db432672SRichard Henderson * Copyright (c) 2018 Linaro 5db432672SRichard Henderson * 6db432672SRichard Henderson * This library is free software; you can redistribute it and/or 7db432672SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8db432672SRichard Henderson * License as published by the Free Software Foundation; either 9fb0343d5SThomas Huth * version 2.1 of the License, or (at your option) any later version. 10db432672SRichard Henderson * 11db432672SRichard Henderson * This library is distributed in the hope that it will be useful, 12db432672SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13db432672SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14db432672SRichard Henderson * Lesser General Public License for more details. 15db432672SRichard Henderson * 16db432672SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17db432672SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18db432672SRichard Henderson */ 19db432672SRichard Henderson 20db432672SRichard Henderson #include "qemu/osdep.h" 21db432672SRichard Henderson #include "qemu/host-utils.h" 22db432672SRichard Henderson #include "cpu.h" 23db432672SRichard Henderson #include "exec/helper-proto.h" 24dcb32f1dSPhilippe Mathieu-Daudé #include "tcg/tcg-gvec-desc.h" 25db432672SRichard Henderson 26db432672SRichard Henderson 27db432672SRichard Henderson #define DUP16(X) X 28db432672SRichard Henderson #define DUP8(X) X 29db432672SRichard Henderson #define DUP4(X) X 30db432672SRichard Henderson #define DUP2(X) X 31db432672SRichard Henderson 32db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 33db432672SRichard Henderson { 34db432672SRichard Henderson intptr_t maxsz = simd_maxsz(desc); 35db432672SRichard Henderson intptr_t i; 36db432672SRichard Henderson 37db432672SRichard Henderson if (unlikely(maxsz > oprsz)) { 38db432672SRichard Henderson for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 39db432672SRichard Henderson *(uint64_t *)(d + i) = 0; 40db432672SRichard Henderson } 41db432672SRichard Henderson } 42db432672SRichard Henderson } 43db432672SRichard Henderson 44db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 45db432672SRichard Henderson { 46db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 47db432672SRichard Henderson intptr_t i; 48db432672SRichard Henderson 49*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 50*6c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 51db432672SRichard Henderson } 52db432672SRichard Henderson clear_high(d, oprsz, desc); 53db432672SRichard Henderson } 54db432672SRichard Henderson 55db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 56db432672SRichard Henderson { 57db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 58db432672SRichard Henderson intptr_t i; 59db432672SRichard Henderson 60*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 61*6c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 62db432672SRichard Henderson } 63db432672SRichard Henderson clear_high(d, oprsz, desc); 64db432672SRichard Henderson } 65db432672SRichard Henderson 66db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 67db432672SRichard Henderson { 68db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 69db432672SRichard Henderson intptr_t i; 70db432672SRichard Henderson 71*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 72*6c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i); 73db432672SRichard Henderson } 74db432672SRichard Henderson clear_high(d, oprsz, desc); 75db432672SRichard Henderson } 76db432672SRichard Henderson 77db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 78db432672SRichard Henderson { 79db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 80db432672SRichard Henderson intptr_t i; 81db432672SRichard Henderson 82*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 83*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i); 84db432672SRichard Henderson } 85db432672SRichard Henderson clear_high(d, oprsz, desc); 86db432672SRichard Henderson } 87db432672SRichard Henderson 8822fc3527SRichard Henderson void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 8922fc3527SRichard Henderson { 9022fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 91*6c7ab301SRichard Henderson uint8_t vecb = (uint8_t)DUP16(b); 9222fc3527SRichard Henderson intptr_t i; 9322fc3527SRichard Henderson 94*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 95*6c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb; 9622fc3527SRichard Henderson } 9722fc3527SRichard Henderson clear_high(d, oprsz, desc); 9822fc3527SRichard Henderson } 9922fc3527SRichard Henderson 10022fc3527SRichard Henderson void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 10122fc3527SRichard Henderson { 10222fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 103*6c7ab301SRichard Henderson uint16_t vecb = (uint16_t)DUP8(b); 10422fc3527SRichard Henderson intptr_t i; 10522fc3527SRichard Henderson 106*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 107*6c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb; 10822fc3527SRichard Henderson } 10922fc3527SRichard Henderson clear_high(d, oprsz, desc); 11022fc3527SRichard Henderson } 11122fc3527SRichard Henderson 11222fc3527SRichard Henderson void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 11322fc3527SRichard Henderson { 11422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 115*6c7ab301SRichard Henderson uint32_t vecb = (uint32_t)DUP4(b); 11622fc3527SRichard Henderson intptr_t i; 11722fc3527SRichard Henderson 118*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 119*6c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb; 12022fc3527SRichard Henderson } 12122fc3527SRichard Henderson clear_high(d, oprsz, desc); 12222fc3527SRichard Henderson } 12322fc3527SRichard Henderson 12422fc3527SRichard Henderson void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 12522fc3527SRichard Henderson { 12622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 127*6c7ab301SRichard Henderson uint64_t vecb = (uint64_t)DUP2(b); 12822fc3527SRichard Henderson intptr_t i; 12922fc3527SRichard Henderson 130*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 131*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb; 13222fc3527SRichard Henderson } 13322fc3527SRichard Henderson clear_high(d, oprsz, desc); 13422fc3527SRichard Henderson } 13522fc3527SRichard Henderson 136db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 137db432672SRichard Henderson { 138db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 139db432672SRichard Henderson intptr_t i; 140db432672SRichard Henderson 141*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 142*6c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 143db432672SRichard Henderson } 144db432672SRichard Henderson clear_high(d, oprsz, desc); 145db432672SRichard Henderson } 146db432672SRichard Henderson 147db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 148db432672SRichard Henderson { 149db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 150db432672SRichard Henderson intptr_t i; 151db432672SRichard Henderson 152*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 153*6c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 154db432672SRichard Henderson } 155db432672SRichard Henderson clear_high(d, oprsz, desc); 156db432672SRichard Henderson } 157db432672SRichard Henderson 158db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 159db432672SRichard Henderson { 160db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 161db432672SRichard Henderson intptr_t i; 162db432672SRichard Henderson 163*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 164*6c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i); 165db432672SRichard Henderson } 166db432672SRichard Henderson clear_high(d, oprsz, desc); 167db432672SRichard Henderson } 168db432672SRichard Henderson 169db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 170db432672SRichard Henderson { 171db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 172db432672SRichard Henderson intptr_t i; 173db432672SRichard Henderson 174*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 175*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i); 176db432672SRichard Henderson } 177db432672SRichard Henderson clear_high(d, oprsz, desc); 178db432672SRichard Henderson } 179db432672SRichard Henderson 18022fc3527SRichard Henderson void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 18122fc3527SRichard Henderson { 18222fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 183*6c7ab301SRichard Henderson uint8_t vecb = (uint8_t)DUP16(b); 18422fc3527SRichard Henderson intptr_t i; 18522fc3527SRichard Henderson 186*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 187*6c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb; 18822fc3527SRichard Henderson } 18922fc3527SRichard Henderson clear_high(d, oprsz, desc); 19022fc3527SRichard Henderson } 19122fc3527SRichard Henderson 19222fc3527SRichard Henderson void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 19322fc3527SRichard Henderson { 19422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 195*6c7ab301SRichard Henderson uint16_t vecb = (uint16_t)DUP8(b); 19622fc3527SRichard Henderson intptr_t i; 19722fc3527SRichard Henderson 198*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 199*6c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb; 20022fc3527SRichard Henderson } 20122fc3527SRichard Henderson clear_high(d, oprsz, desc); 20222fc3527SRichard Henderson } 20322fc3527SRichard Henderson 20422fc3527SRichard Henderson void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 20522fc3527SRichard Henderson { 20622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 207*6c7ab301SRichard Henderson uint32_t vecb = (uint32_t)DUP4(b); 20822fc3527SRichard Henderson intptr_t i; 20922fc3527SRichard Henderson 210*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 211*6c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb; 21222fc3527SRichard Henderson } 21322fc3527SRichard Henderson clear_high(d, oprsz, desc); 21422fc3527SRichard Henderson } 21522fc3527SRichard Henderson 21622fc3527SRichard Henderson void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 21722fc3527SRichard Henderson { 21822fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 219*6c7ab301SRichard Henderson uint64_t vecb = (uint64_t)DUP2(b); 22022fc3527SRichard Henderson intptr_t i; 22122fc3527SRichard Henderson 222*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 223*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb; 22422fc3527SRichard Henderson } 22522fc3527SRichard Henderson clear_high(d, oprsz, desc); 22622fc3527SRichard Henderson } 22722fc3527SRichard Henderson 2283774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 2293774030aSRichard Henderson { 2303774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2313774030aSRichard Henderson intptr_t i; 2323774030aSRichard Henderson 233*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 234*6c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i); 2353774030aSRichard Henderson } 2363774030aSRichard Henderson clear_high(d, oprsz, desc); 2373774030aSRichard Henderson } 2383774030aSRichard Henderson 2393774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 2403774030aSRichard Henderson { 2413774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2423774030aSRichard Henderson intptr_t i; 2433774030aSRichard Henderson 244*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 245*6c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i); 2463774030aSRichard Henderson } 2473774030aSRichard Henderson clear_high(d, oprsz, desc); 2483774030aSRichard Henderson } 2493774030aSRichard Henderson 2503774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 2513774030aSRichard Henderson { 2523774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2533774030aSRichard Henderson intptr_t i; 2543774030aSRichard Henderson 255*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 256*6c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i); 2573774030aSRichard Henderson } 2583774030aSRichard Henderson clear_high(d, oprsz, desc); 2593774030aSRichard Henderson } 2603774030aSRichard Henderson 2613774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 2623774030aSRichard Henderson { 2633774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 2643774030aSRichard Henderson intptr_t i; 2653774030aSRichard Henderson 266*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 267*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i); 2683774030aSRichard Henderson } 2693774030aSRichard Henderson clear_high(d, oprsz, desc); 2703774030aSRichard Henderson } 2713774030aSRichard Henderson 27222fc3527SRichard Henderson void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 27322fc3527SRichard Henderson { 27422fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 275*6c7ab301SRichard Henderson uint8_t vecb = (uint8_t)DUP16(b); 27622fc3527SRichard Henderson intptr_t i; 27722fc3527SRichard Henderson 278*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 279*6c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb; 28022fc3527SRichard Henderson } 28122fc3527SRichard Henderson clear_high(d, oprsz, desc); 28222fc3527SRichard Henderson } 28322fc3527SRichard Henderson 28422fc3527SRichard Henderson void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 28522fc3527SRichard Henderson { 28622fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 287*6c7ab301SRichard Henderson uint16_t vecb = (uint16_t)DUP8(b); 28822fc3527SRichard Henderson intptr_t i; 28922fc3527SRichard Henderson 290*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 291*6c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb; 29222fc3527SRichard Henderson } 29322fc3527SRichard Henderson clear_high(d, oprsz, desc); 29422fc3527SRichard Henderson } 29522fc3527SRichard Henderson 29622fc3527SRichard Henderson void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 29722fc3527SRichard Henderson { 29822fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 299*6c7ab301SRichard Henderson uint32_t vecb = (uint32_t)DUP4(b); 30022fc3527SRichard Henderson intptr_t i; 30122fc3527SRichard Henderson 302*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 303*6c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb; 30422fc3527SRichard Henderson } 30522fc3527SRichard Henderson clear_high(d, oprsz, desc); 30622fc3527SRichard Henderson } 30722fc3527SRichard Henderson 30822fc3527SRichard Henderson void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 30922fc3527SRichard Henderson { 31022fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 311*6c7ab301SRichard Henderson uint64_t vecb = (uint64_t)DUP2(b); 31222fc3527SRichard Henderson intptr_t i; 31322fc3527SRichard Henderson 314*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 315*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb; 31622fc3527SRichard Henderson } 31722fc3527SRichard Henderson clear_high(d, oprsz, desc); 31822fc3527SRichard Henderson } 31922fc3527SRichard Henderson 320db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 321db432672SRichard Henderson { 322db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 323db432672SRichard Henderson intptr_t i; 324db432672SRichard Henderson 325*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 326*6c7ab301SRichard Henderson *(uint8_t *)(d + i) = -*(uint8_t *)(a + i); 327db432672SRichard Henderson } 328db432672SRichard Henderson clear_high(d, oprsz, desc); 329db432672SRichard Henderson } 330db432672SRichard Henderson 331db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 332db432672SRichard Henderson { 333db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 334db432672SRichard Henderson intptr_t i; 335db432672SRichard Henderson 336*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 337*6c7ab301SRichard Henderson *(uint16_t *)(d + i) = -*(uint16_t *)(a + i); 338db432672SRichard Henderson } 339db432672SRichard Henderson clear_high(d, oprsz, desc); 340db432672SRichard Henderson } 341db432672SRichard Henderson 342db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 343db432672SRichard Henderson { 344db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 345db432672SRichard Henderson intptr_t i; 346db432672SRichard Henderson 347*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 348*6c7ab301SRichard Henderson *(uint32_t *)(d + i) = -*(uint32_t *)(a + i); 349db432672SRichard Henderson } 350db432672SRichard Henderson clear_high(d, oprsz, desc); 351db432672SRichard Henderson } 352db432672SRichard Henderson 353db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 354db432672SRichard Henderson { 355db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 356db432672SRichard Henderson intptr_t i; 357db432672SRichard Henderson 358*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 359*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = -*(uint64_t *)(a + i); 360db432672SRichard Henderson } 361db432672SRichard Henderson clear_high(d, oprsz, desc); 362db432672SRichard Henderson } 363db432672SRichard Henderson 364bcefc902SRichard Henderson void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) 365bcefc902SRichard Henderson { 366bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 367bcefc902SRichard Henderson intptr_t i; 368bcefc902SRichard Henderson 369bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 370bcefc902SRichard Henderson int8_t aa = *(int8_t *)(a + i); 371bcefc902SRichard Henderson *(int8_t *)(d + i) = aa < 0 ? -aa : aa; 372bcefc902SRichard Henderson } 373bcefc902SRichard Henderson clear_high(d, oprsz, desc); 374bcefc902SRichard Henderson } 375bcefc902SRichard Henderson 376bcefc902SRichard Henderson void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) 377bcefc902SRichard Henderson { 378bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 379bcefc902SRichard Henderson intptr_t i; 380bcefc902SRichard Henderson 381bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 382bcefc902SRichard Henderson int16_t aa = *(int16_t *)(a + i); 383bcefc902SRichard Henderson *(int16_t *)(d + i) = aa < 0 ? -aa : aa; 384bcefc902SRichard Henderson } 385bcefc902SRichard Henderson clear_high(d, oprsz, desc); 386bcefc902SRichard Henderson } 387bcefc902SRichard Henderson 388bcefc902SRichard Henderson void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) 389bcefc902SRichard Henderson { 390bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 391bcefc902SRichard Henderson intptr_t i; 392bcefc902SRichard Henderson 393bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 394bcefc902SRichard Henderson int32_t aa = *(int32_t *)(a + i); 395bcefc902SRichard Henderson *(int32_t *)(d + i) = aa < 0 ? -aa : aa; 396bcefc902SRichard Henderson } 397bcefc902SRichard Henderson clear_high(d, oprsz, desc); 398bcefc902SRichard Henderson } 399bcefc902SRichard Henderson 400bcefc902SRichard Henderson void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) 401bcefc902SRichard Henderson { 402bcefc902SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 403bcefc902SRichard Henderson intptr_t i; 404bcefc902SRichard Henderson 405bcefc902SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 406bcefc902SRichard Henderson int64_t aa = *(int64_t *)(a + i); 407bcefc902SRichard Henderson *(int64_t *)(d + i) = aa < 0 ? -aa : aa; 408bcefc902SRichard Henderson } 409bcefc902SRichard Henderson clear_high(d, oprsz, desc); 410bcefc902SRichard Henderson } 411bcefc902SRichard Henderson 412db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 413db432672SRichard Henderson { 414db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 415db432672SRichard Henderson 416db432672SRichard Henderson memcpy(d, a, oprsz); 417db432672SRichard Henderson clear_high(d, oprsz, desc); 418db432672SRichard Henderson } 419db432672SRichard Henderson 420db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 421db432672SRichard Henderson { 422db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 423db432672SRichard Henderson intptr_t i; 424db432672SRichard Henderson 425db432672SRichard Henderson if (c == 0) { 426db432672SRichard Henderson oprsz = 0; 427db432672SRichard Henderson } else { 428db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 429db432672SRichard Henderson *(uint64_t *)(d + i) = c; 430db432672SRichard Henderson } 431db432672SRichard Henderson } 432db432672SRichard Henderson clear_high(d, oprsz, desc); 433db432672SRichard Henderson } 434db432672SRichard Henderson 435db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 436db432672SRichard Henderson { 437db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 438db432672SRichard Henderson intptr_t i; 439db432672SRichard Henderson 440db432672SRichard Henderson if (c == 0) { 441db432672SRichard Henderson oprsz = 0; 442db432672SRichard Henderson } else { 443db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 444db432672SRichard Henderson *(uint32_t *)(d + i) = c; 445db432672SRichard Henderson } 446db432672SRichard Henderson } 447db432672SRichard Henderson clear_high(d, oprsz, desc); 448db432672SRichard Henderson } 449db432672SRichard Henderson 450db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 451db432672SRichard Henderson { 452db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 453db432672SRichard Henderson } 454db432672SRichard Henderson 455db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 456db432672SRichard Henderson { 457db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 458db432672SRichard Henderson } 459db432672SRichard Henderson 460db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 461db432672SRichard Henderson { 462db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 463db432672SRichard Henderson intptr_t i; 464db432672SRichard Henderson 465*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 466*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i); 467db432672SRichard Henderson } 468db432672SRichard Henderson clear_high(d, oprsz, desc); 469db432672SRichard Henderson } 470db432672SRichard Henderson 471db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 472db432672SRichard Henderson { 473db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 474db432672SRichard Henderson intptr_t i; 475db432672SRichard Henderson 476*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 477*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i); 478db432672SRichard Henderson } 479db432672SRichard Henderson clear_high(d, oprsz, desc); 480db432672SRichard Henderson } 481db432672SRichard Henderson 482db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 483db432672SRichard Henderson { 484db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 485db432672SRichard Henderson intptr_t i; 486db432672SRichard Henderson 487*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 488*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i); 489db432672SRichard Henderson } 490db432672SRichard Henderson clear_high(d, oprsz, desc); 491db432672SRichard Henderson } 492db432672SRichard Henderson 493db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 494db432672SRichard Henderson { 495db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 496db432672SRichard Henderson intptr_t i; 497db432672SRichard Henderson 498*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 499*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i); 500db432672SRichard Henderson } 501db432672SRichard Henderson clear_high(d, oprsz, desc); 502db432672SRichard Henderson } 503db432672SRichard Henderson 504db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 505db432672SRichard Henderson { 506db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 507db432672SRichard Henderson intptr_t i; 508db432672SRichard Henderson 509*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 510*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i); 511db432672SRichard Henderson } 512db432672SRichard Henderson clear_high(d, oprsz, desc); 513db432672SRichard Henderson } 514db432672SRichard Henderson 515db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 516db432672SRichard Henderson { 517db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 518db432672SRichard Henderson intptr_t i; 519db432672SRichard Henderson 520*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 521*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i); 522db432672SRichard Henderson } 523db432672SRichard Henderson clear_high(d, oprsz, desc); 524db432672SRichard Henderson } 525d0ec9796SRichard Henderson 526f550805dSRichard Henderson void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 527f550805dSRichard Henderson { 528f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 529f550805dSRichard Henderson intptr_t i; 530f550805dSRichard Henderson 531*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 532*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i)); 533f550805dSRichard Henderson } 534f550805dSRichard Henderson clear_high(d, oprsz, desc); 535f550805dSRichard Henderson } 536f550805dSRichard Henderson 537f550805dSRichard Henderson void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 538f550805dSRichard Henderson { 539f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 540f550805dSRichard Henderson intptr_t i; 541f550805dSRichard Henderson 542*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 543*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i)); 544f550805dSRichard Henderson } 545f550805dSRichard Henderson clear_high(d, oprsz, desc); 546f550805dSRichard Henderson } 547f550805dSRichard Henderson 548f550805dSRichard Henderson void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 549f550805dSRichard Henderson { 550f550805dSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 551f550805dSRichard Henderson intptr_t i; 552f550805dSRichard Henderson 553*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 554*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i)); 555f550805dSRichard Henderson } 556f550805dSRichard Henderson clear_high(d, oprsz, desc); 557f550805dSRichard Henderson } 558f550805dSRichard Henderson 55922fc3527SRichard Henderson void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 56022fc3527SRichard Henderson { 56122fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 562*6c7ab301SRichard Henderson uint64_t vecb = (uint64_t)DUP2(b); 56322fc3527SRichard Henderson intptr_t i; 56422fc3527SRichard Henderson 565*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 566*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb; 56722fc3527SRichard Henderson } 56822fc3527SRichard Henderson clear_high(d, oprsz, desc); 56922fc3527SRichard Henderson } 57022fc3527SRichard Henderson 57122fc3527SRichard Henderson void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 57222fc3527SRichard Henderson { 57322fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 574*6c7ab301SRichard Henderson uint64_t vecb = (uint64_t)DUP2(b); 57522fc3527SRichard Henderson intptr_t i; 57622fc3527SRichard Henderson 577*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 578*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb; 57922fc3527SRichard Henderson } 58022fc3527SRichard Henderson clear_high(d, oprsz, desc); 58122fc3527SRichard Henderson } 58222fc3527SRichard Henderson 58322fc3527SRichard Henderson void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 58422fc3527SRichard Henderson { 58522fc3527SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 586*6c7ab301SRichard Henderson uint64_t vecb = (uint64_t)DUP2(b); 58722fc3527SRichard Henderson intptr_t i; 58822fc3527SRichard Henderson 589*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 590*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb; 59122fc3527SRichard Henderson } 59222fc3527SRichard Henderson clear_high(d, oprsz, desc); 59322fc3527SRichard Henderson } 59422fc3527SRichard Henderson 595d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 596d0ec9796SRichard Henderson { 597d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 598d0ec9796SRichard Henderson int shift = simd_data(desc); 599d0ec9796SRichard Henderson intptr_t i; 600d0ec9796SRichard Henderson 601*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 602*6c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift; 603d0ec9796SRichard Henderson } 604d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 605d0ec9796SRichard Henderson } 606d0ec9796SRichard Henderson 607d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 608d0ec9796SRichard Henderson { 609d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 610d0ec9796SRichard Henderson int shift = simd_data(desc); 611d0ec9796SRichard Henderson intptr_t i; 612d0ec9796SRichard Henderson 613*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 614*6c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift; 615d0ec9796SRichard Henderson } 616d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 617d0ec9796SRichard Henderson } 618d0ec9796SRichard Henderson 619d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 620d0ec9796SRichard Henderson { 621d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 622d0ec9796SRichard Henderson int shift = simd_data(desc); 623d0ec9796SRichard Henderson intptr_t i; 624d0ec9796SRichard Henderson 625*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 626*6c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift; 627d0ec9796SRichard Henderson } 628d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 629d0ec9796SRichard Henderson } 630d0ec9796SRichard Henderson 631d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 632d0ec9796SRichard Henderson { 633d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 634d0ec9796SRichard Henderson int shift = simd_data(desc); 635d0ec9796SRichard Henderson intptr_t i; 636d0ec9796SRichard Henderson 637*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 638*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift; 639d0ec9796SRichard Henderson } 640d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 641d0ec9796SRichard Henderson } 642d0ec9796SRichard Henderson 643d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 644d0ec9796SRichard Henderson { 645d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 646d0ec9796SRichard Henderson int shift = simd_data(desc); 647d0ec9796SRichard Henderson intptr_t i; 648d0ec9796SRichard Henderson 649*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 650*6c7ab301SRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift; 651d0ec9796SRichard Henderson } 652d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 653d0ec9796SRichard Henderson } 654d0ec9796SRichard Henderson 655d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 656d0ec9796SRichard Henderson { 657d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 658d0ec9796SRichard Henderson int shift = simd_data(desc); 659d0ec9796SRichard Henderson intptr_t i; 660d0ec9796SRichard Henderson 661*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 662*6c7ab301SRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift; 663d0ec9796SRichard Henderson } 664d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 665d0ec9796SRichard Henderson } 666d0ec9796SRichard Henderson 667d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 668d0ec9796SRichard Henderson { 669d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 670d0ec9796SRichard Henderson int shift = simd_data(desc); 671d0ec9796SRichard Henderson intptr_t i; 672d0ec9796SRichard Henderson 673*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 674*6c7ab301SRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift; 675d0ec9796SRichard Henderson } 676d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 677d0ec9796SRichard Henderson } 678d0ec9796SRichard Henderson 679d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 680d0ec9796SRichard Henderson { 681d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 682d0ec9796SRichard Henderson int shift = simd_data(desc); 683d0ec9796SRichard Henderson intptr_t i; 684d0ec9796SRichard Henderson 685*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 686*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift; 687d0ec9796SRichard Henderson } 688d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 689d0ec9796SRichard Henderson } 690d0ec9796SRichard Henderson 691d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 692d0ec9796SRichard Henderson { 693d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 694d0ec9796SRichard Henderson int shift = simd_data(desc); 695d0ec9796SRichard Henderson intptr_t i; 696d0ec9796SRichard Henderson 697*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 698*6c7ab301SRichard Henderson *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift; 699d0ec9796SRichard Henderson } 700d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 701d0ec9796SRichard Henderson } 702d0ec9796SRichard Henderson 703d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 704d0ec9796SRichard Henderson { 705d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 706d0ec9796SRichard Henderson int shift = simd_data(desc); 707d0ec9796SRichard Henderson intptr_t i; 708d0ec9796SRichard Henderson 709*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 710*6c7ab301SRichard Henderson *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift; 711d0ec9796SRichard Henderson } 712d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 713d0ec9796SRichard Henderson } 714d0ec9796SRichard Henderson 715d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 716d0ec9796SRichard Henderson { 717d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 718d0ec9796SRichard Henderson int shift = simd_data(desc); 719d0ec9796SRichard Henderson intptr_t i; 720d0ec9796SRichard Henderson 721*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 722*6c7ab301SRichard Henderson *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift; 723d0ec9796SRichard Henderson } 724d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 725d0ec9796SRichard Henderson } 726d0ec9796SRichard Henderson 727d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 728d0ec9796SRichard Henderson { 729d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 730d0ec9796SRichard Henderson int shift = simd_data(desc); 731d0ec9796SRichard Henderson intptr_t i; 732d0ec9796SRichard Henderson 733*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 734*6c7ab301SRichard Henderson *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift; 735d0ec9796SRichard Henderson } 736d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 737d0ec9796SRichard Henderson } 738212be173SRichard Henderson 7395ee5c14cSRichard Henderson void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 7405ee5c14cSRichard Henderson { 7415ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7425ee5c14cSRichard Henderson intptr_t i; 7435ee5c14cSRichard Henderson 7445ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 7455ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 7465ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 7475ee5c14cSRichard Henderson } 7485ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7495ee5c14cSRichard Henderson } 7505ee5c14cSRichard Henderson 7515ee5c14cSRichard Henderson void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 7525ee5c14cSRichard Henderson { 7535ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7545ee5c14cSRichard Henderson intptr_t i; 7555ee5c14cSRichard Henderson 7565ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 7575ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 7585ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 7595ee5c14cSRichard Henderson } 7605ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7615ee5c14cSRichard Henderson } 7625ee5c14cSRichard Henderson 7635ee5c14cSRichard Henderson void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 7645ee5c14cSRichard Henderson { 7655ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7665ee5c14cSRichard Henderson intptr_t i; 7675ee5c14cSRichard Henderson 7685ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 7695ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 7705ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 7715ee5c14cSRichard Henderson } 7725ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7735ee5c14cSRichard Henderson } 7745ee5c14cSRichard Henderson 7755ee5c14cSRichard Henderson void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 7765ee5c14cSRichard Henderson { 7775ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7785ee5c14cSRichard Henderson intptr_t i; 7795ee5c14cSRichard Henderson 7805ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 7815ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 7825ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 7835ee5c14cSRichard Henderson } 7845ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7855ee5c14cSRichard Henderson } 7865ee5c14cSRichard Henderson 7875ee5c14cSRichard Henderson void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 7885ee5c14cSRichard Henderson { 7895ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 7905ee5c14cSRichard Henderson intptr_t i; 7915ee5c14cSRichard Henderson 7925ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 7935ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 7945ee5c14cSRichard Henderson *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 7955ee5c14cSRichard Henderson } 7965ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 7975ee5c14cSRichard Henderson } 7985ee5c14cSRichard Henderson 7995ee5c14cSRichard Henderson void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 8005ee5c14cSRichard Henderson { 8015ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8025ee5c14cSRichard Henderson intptr_t i; 8035ee5c14cSRichard Henderson 8045ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 8055ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 8065ee5c14cSRichard Henderson *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 8075ee5c14cSRichard Henderson } 8085ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8095ee5c14cSRichard Henderson } 8105ee5c14cSRichard Henderson 8115ee5c14cSRichard Henderson void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 8125ee5c14cSRichard Henderson { 8135ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8145ee5c14cSRichard Henderson intptr_t i; 8155ee5c14cSRichard Henderson 8165ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 8175ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 8185ee5c14cSRichard Henderson *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 8195ee5c14cSRichard Henderson } 8205ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8215ee5c14cSRichard Henderson } 8225ee5c14cSRichard Henderson 8235ee5c14cSRichard Henderson void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 8245ee5c14cSRichard Henderson { 8255ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8265ee5c14cSRichard Henderson intptr_t i; 8275ee5c14cSRichard Henderson 8285ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 8295ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 8305ee5c14cSRichard Henderson *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 8315ee5c14cSRichard Henderson } 8325ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8335ee5c14cSRichard Henderson } 8345ee5c14cSRichard Henderson 8355ee5c14cSRichard Henderson void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 8365ee5c14cSRichard Henderson { 8375ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8385ee5c14cSRichard Henderson intptr_t i; 8395ee5c14cSRichard Henderson 840899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 8415ee5c14cSRichard Henderson uint8_t sh = *(uint8_t *)(b + i) & 7; 8425ee5c14cSRichard Henderson *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 8435ee5c14cSRichard Henderson } 8445ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8455ee5c14cSRichard Henderson } 8465ee5c14cSRichard Henderson 8475ee5c14cSRichard Henderson void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 8485ee5c14cSRichard Henderson { 8495ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8505ee5c14cSRichard Henderson intptr_t i; 8515ee5c14cSRichard Henderson 8525ee5c14cSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 8535ee5c14cSRichard Henderson uint8_t sh = *(uint16_t *)(b + i) & 15; 8545ee5c14cSRichard Henderson *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 8555ee5c14cSRichard Henderson } 8565ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8575ee5c14cSRichard Henderson } 8585ee5c14cSRichard Henderson 8595ee5c14cSRichard Henderson void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 8605ee5c14cSRichard Henderson { 8615ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8625ee5c14cSRichard Henderson intptr_t i; 8635ee5c14cSRichard Henderson 864899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 8655ee5c14cSRichard Henderson uint8_t sh = *(uint32_t *)(b + i) & 31; 8665ee5c14cSRichard Henderson *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 8675ee5c14cSRichard Henderson } 8685ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8695ee5c14cSRichard Henderson } 8705ee5c14cSRichard Henderson 8715ee5c14cSRichard Henderson void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 8725ee5c14cSRichard Henderson { 8735ee5c14cSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 8745ee5c14cSRichard Henderson intptr_t i; 8755ee5c14cSRichard Henderson 876899f08adSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 8775ee5c14cSRichard Henderson uint8_t sh = *(uint64_t *)(b + i) & 63; 8785ee5c14cSRichard Henderson *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 8795ee5c14cSRichard Henderson } 8805ee5c14cSRichard Henderson clear_high(d, oprsz, desc); 8815ee5c14cSRichard Henderson } 8825ee5c14cSRichard Henderson 883212be173SRichard Henderson #define DO_CMP0(X) -(X) 884212be173SRichard Henderson 885212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP) \ 886212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 887212be173SRichard Henderson { \ 888212be173SRichard Henderson intptr_t oprsz = simd_oprsz(desc); \ 889212be173SRichard Henderson intptr_t i; \ 8906cb1d3b8SRichard Henderson for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 891212be173SRichard Henderson *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 892212be173SRichard Henderson } \ 893212be173SRichard Henderson clear_high(d, oprsz, desc); \ 894212be173SRichard Henderson } 895212be173SRichard Henderson 896212be173SRichard Henderson #define DO_CMP2(SZ) \ 897*6c7ab301SRichard Henderson DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \ 898*6c7ab301SRichard Henderson DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \ 899*6c7ab301SRichard Henderson DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \ 900*6c7ab301SRichard Henderson DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \ 901*6c7ab301SRichard Henderson DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \ 902*6c7ab301SRichard Henderson DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=) 903212be173SRichard Henderson 904212be173SRichard Henderson DO_CMP2(8) 905212be173SRichard Henderson DO_CMP2(16) 906212be173SRichard Henderson DO_CMP2(32) 907212be173SRichard Henderson DO_CMP2(64) 908212be173SRichard Henderson 909212be173SRichard Henderson #undef DO_CMP0 910212be173SRichard Henderson #undef DO_CMP1 911212be173SRichard Henderson #undef DO_CMP2 912f49b12c6SRichard Henderson 913f49b12c6SRichard Henderson void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 914f49b12c6SRichard Henderson { 915f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 916f49b12c6SRichard Henderson intptr_t i; 917f49b12c6SRichard Henderson 918f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 919f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 920f49b12c6SRichard Henderson if (r > INT8_MAX) { 921f49b12c6SRichard Henderson r = INT8_MAX; 922f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 923f49b12c6SRichard Henderson r = INT8_MIN; 924f49b12c6SRichard Henderson } 925f49b12c6SRichard Henderson *(int8_t *)(d + i) = r; 926f49b12c6SRichard Henderson } 927f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 928f49b12c6SRichard Henderson } 929f49b12c6SRichard Henderson 930f49b12c6SRichard Henderson void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 931f49b12c6SRichard Henderson { 932f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 933f49b12c6SRichard Henderson intptr_t i; 934f49b12c6SRichard Henderson 935f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 936f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 937f49b12c6SRichard Henderson if (r > INT16_MAX) { 938f49b12c6SRichard Henderson r = INT16_MAX; 939f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 940f49b12c6SRichard Henderson r = INT16_MIN; 941f49b12c6SRichard Henderson } 942f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 943f49b12c6SRichard Henderson } 944f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 945f49b12c6SRichard Henderson } 946f49b12c6SRichard Henderson 947f49b12c6SRichard Henderson void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 948f49b12c6SRichard Henderson { 949f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 950f49b12c6SRichard Henderson intptr_t i; 951f49b12c6SRichard Henderson 952f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 953f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 954f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 955f49b12c6SRichard Henderson int32_t di = ai + bi; 956f49b12c6SRichard Henderson if (((di ^ ai) &~ (ai ^ bi)) < 0) { 957f49b12c6SRichard Henderson /* Signed overflow. */ 958f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 959f49b12c6SRichard Henderson } 960f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 961f49b12c6SRichard Henderson } 962f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 963f49b12c6SRichard Henderson } 964f49b12c6SRichard Henderson 965f49b12c6SRichard Henderson void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 966f49b12c6SRichard Henderson { 967f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 968f49b12c6SRichard Henderson intptr_t i; 969f49b12c6SRichard Henderson 970f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 971f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 972f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 973f49b12c6SRichard Henderson int64_t di = ai + bi; 974f49b12c6SRichard Henderson if (((di ^ ai) &~ (ai ^ bi)) < 0) { 975f49b12c6SRichard Henderson /* Signed overflow. */ 976f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 977f49b12c6SRichard Henderson } 978f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 979f49b12c6SRichard Henderson } 980f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 981f49b12c6SRichard Henderson } 982f49b12c6SRichard Henderson 983f49b12c6SRichard Henderson void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 984f49b12c6SRichard Henderson { 985f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 986f49b12c6SRichard Henderson intptr_t i; 987f49b12c6SRichard Henderson 988f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 989f49b12c6SRichard Henderson int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 990f49b12c6SRichard Henderson if (r > INT8_MAX) { 991f49b12c6SRichard Henderson r = INT8_MAX; 992f49b12c6SRichard Henderson } else if (r < INT8_MIN) { 993f49b12c6SRichard Henderson r = INT8_MIN; 994f49b12c6SRichard Henderson } 995f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 996f49b12c6SRichard Henderson } 997f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 998f49b12c6SRichard Henderson } 999f49b12c6SRichard Henderson 1000f49b12c6SRichard Henderson void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 1001f49b12c6SRichard Henderson { 1002f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1003f49b12c6SRichard Henderson intptr_t i; 1004f49b12c6SRichard Henderson 1005f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1006f49b12c6SRichard Henderson int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 1007f49b12c6SRichard Henderson if (r > INT16_MAX) { 1008f49b12c6SRichard Henderson r = INT16_MAX; 1009f49b12c6SRichard Henderson } else if (r < INT16_MIN) { 1010f49b12c6SRichard Henderson r = INT16_MIN; 1011f49b12c6SRichard Henderson } 1012f49b12c6SRichard Henderson *(int16_t *)(d + i) = r; 1013f49b12c6SRichard Henderson } 1014f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1015f49b12c6SRichard Henderson } 1016f49b12c6SRichard Henderson 1017f49b12c6SRichard Henderson void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 1018f49b12c6SRichard Henderson { 1019f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1020f49b12c6SRichard Henderson intptr_t i; 1021f49b12c6SRichard Henderson 1022f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1023f49b12c6SRichard Henderson int32_t ai = *(int32_t *)(a + i); 1024f49b12c6SRichard Henderson int32_t bi = *(int32_t *)(b + i); 1025f49b12c6SRichard Henderson int32_t di = ai - bi; 1026f49b12c6SRichard Henderson if (((di ^ ai) & (ai ^ bi)) < 0) { 1027f49b12c6SRichard Henderson /* Signed overflow. */ 1028f49b12c6SRichard Henderson di = (di < 0 ? INT32_MAX : INT32_MIN); 1029f49b12c6SRichard Henderson } 1030f49b12c6SRichard Henderson *(int32_t *)(d + i) = di; 1031f49b12c6SRichard Henderson } 1032f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1033f49b12c6SRichard Henderson } 1034f49b12c6SRichard Henderson 1035f49b12c6SRichard Henderson void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1036f49b12c6SRichard Henderson { 1037f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1038f49b12c6SRichard Henderson intptr_t i; 1039f49b12c6SRichard Henderson 1040f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1041f49b12c6SRichard Henderson int64_t ai = *(int64_t *)(a + i); 1042f49b12c6SRichard Henderson int64_t bi = *(int64_t *)(b + i); 1043f49b12c6SRichard Henderson int64_t di = ai - bi; 1044f49b12c6SRichard Henderson if (((di ^ ai) & (ai ^ bi)) < 0) { 1045f49b12c6SRichard Henderson /* Signed overflow. */ 1046f49b12c6SRichard Henderson di = (di < 0 ? INT64_MAX : INT64_MIN); 1047f49b12c6SRichard Henderson } 1048f49b12c6SRichard Henderson *(int64_t *)(d + i) = di; 1049f49b12c6SRichard Henderson } 1050f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1051f49b12c6SRichard Henderson } 1052f49b12c6SRichard Henderson 1053f49b12c6SRichard Henderson void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1054f49b12c6SRichard Henderson { 1055f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1056f49b12c6SRichard Henderson intptr_t i; 1057f49b12c6SRichard Henderson 1058f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1059f49b12c6SRichard Henderson unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1060f49b12c6SRichard Henderson if (r > UINT8_MAX) { 1061f49b12c6SRichard Henderson r = UINT8_MAX; 1062f49b12c6SRichard Henderson } 1063f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1064f49b12c6SRichard Henderson } 1065f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1066f49b12c6SRichard Henderson } 1067f49b12c6SRichard Henderson 1068f49b12c6SRichard Henderson void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1069f49b12c6SRichard Henderson { 1070f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1071f49b12c6SRichard Henderson intptr_t i; 1072f49b12c6SRichard Henderson 1073f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1074f49b12c6SRichard Henderson unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1075f49b12c6SRichard Henderson if (r > UINT16_MAX) { 1076f49b12c6SRichard Henderson r = UINT16_MAX; 1077f49b12c6SRichard Henderson } 1078f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 1079f49b12c6SRichard Henderson } 1080f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1081f49b12c6SRichard Henderson } 1082f49b12c6SRichard Henderson 1083f49b12c6SRichard Henderson void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1084f49b12c6SRichard Henderson { 1085f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1086f49b12c6SRichard Henderson intptr_t i; 1087f49b12c6SRichard Henderson 1088f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1089f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1090f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 1091f49b12c6SRichard Henderson uint32_t di = ai + bi; 1092f49b12c6SRichard Henderson if (di < ai) { 1093f49b12c6SRichard Henderson di = UINT32_MAX; 1094f49b12c6SRichard Henderson } 1095f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1096f49b12c6SRichard Henderson } 1097f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1098f49b12c6SRichard Henderson } 1099f49b12c6SRichard Henderson 1100f49b12c6SRichard Henderson void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1101f49b12c6SRichard Henderson { 1102f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1103f49b12c6SRichard Henderson intptr_t i; 1104f49b12c6SRichard Henderson 1105f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1106f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1107f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 1108f49b12c6SRichard Henderson uint64_t di = ai + bi; 1109f49b12c6SRichard Henderson if (di < ai) { 1110f49b12c6SRichard Henderson di = UINT64_MAX; 1111f49b12c6SRichard Henderson } 1112f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1113f49b12c6SRichard Henderson } 1114f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1115f49b12c6SRichard Henderson } 1116f49b12c6SRichard Henderson 1117f49b12c6SRichard Henderson void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1118f49b12c6SRichard Henderson { 1119f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1120f49b12c6SRichard Henderson intptr_t i; 1121f49b12c6SRichard Henderson 1122f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1123f49b12c6SRichard Henderson int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1124f49b12c6SRichard Henderson if (r < 0) { 1125f49b12c6SRichard Henderson r = 0; 1126f49b12c6SRichard Henderson } 1127f49b12c6SRichard Henderson *(uint8_t *)(d + i) = r; 1128f49b12c6SRichard Henderson } 1129f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1130f49b12c6SRichard Henderson } 1131f49b12c6SRichard Henderson 1132f49b12c6SRichard Henderson void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1133f49b12c6SRichard Henderson { 1134f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1135f49b12c6SRichard Henderson intptr_t i; 1136f49b12c6SRichard Henderson 1137f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1138f49b12c6SRichard Henderson int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1139f49b12c6SRichard Henderson if (r < 0) { 1140f49b12c6SRichard Henderson r = 0; 1141f49b12c6SRichard Henderson } 1142f49b12c6SRichard Henderson *(uint16_t *)(d + i) = r; 1143f49b12c6SRichard Henderson } 1144f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1145f49b12c6SRichard Henderson } 1146f49b12c6SRichard Henderson 1147f49b12c6SRichard Henderson void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1148f49b12c6SRichard Henderson { 1149f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1150f49b12c6SRichard Henderson intptr_t i; 1151f49b12c6SRichard Henderson 1152f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1153f49b12c6SRichard Henderson uint32_t ai = *(uint32_t *)(a + i); 1154f49b12c6SRichard Henderson uint32_t bi = *(uint32_t *)(b + i); 1155f49b12c6SRichard Henderson uint32_t di = ai - bi; 1156f49b12c6SRichard Henderson if (ai < bi) { 1157f49b12c6SRichard Henderson di = 0; 1158f49b12c6SRichard Henderson } 1159f49b12c6SRichard Henderson *(uint32_t *)(d + i) = di; 1160f49b12c6SRichard Henderson } 1161f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1162f49b12c6SRichard Henderson } 1163f49b12c6SRichard Henderson 1164f49b12c6SRichard Henderson void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1165f49b12c6SRichard Henderson { 1166f49b12c6SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1167f49b12c6SRichard Henderson intptr_t i; 1168f49b12c6SRichard Henderson 1169f49b12c6SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1170f49b12c6SRichard Henderson uint64_t ai = *(uint64_t *)(a + i); 1171f49b12c6SRichard Henderson uint64_t bi = *(uint64_t *)(b + i); 1172f49b12c6SRichard Henderson uint64_t di = ai - bi; 1173f49b12c6SRichard Henderson if (ai < bi) { 1174f49b12c6SRichard Henderson di = 0; 1175f49b12c6SRichard Henderson } 1176f49b12c6SRichard Henderson *(uint64_t *)(d + i) = di; 1177f49b12c6SRichard Henderson } 1178f49b12c6SRichard Henderson clear_high(d, oprsz, desc); 1179f49b12c6SRichard Henderson } 1180dd0a0fcdSRichard Henderson 1181dd0a0fcdSRichard Henderson void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1182dd0a0fcdSRichard Henderson { 1183dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1184dd0a0fcdSRichard Henderson intptr_t i; 1185dd0a0fcdSRichard Henderson 1186dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1187dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1188dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1189dd0a0fcdSRichard Henderson int8_t dd = aa < bb ? aa : bb; 1190dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1191dd0a0fcdSRichard Henderson } 1192dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1193dd0a0fcdSRichard Henderson } 1194dd0a0fcdSRichard Henderson 1195dd0a0fcdSRichard Henderson void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1196dd0a0fcdSRichard Henderson { 1197dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1198dd0a0fcdSRichard Henderson intptr_t i; 1199dd0a0fcdSRichard Henderson 1200dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1201dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1202dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1203dd0a0fcdSRichard Henderson int16_t dd = aa < bb ? aa : bb; 1204dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1205dd0a0fcdSRichard Henderson } 1206dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1207dd0a0fcdSRichard Henderson } 1208dd0a0fcdSRichard Henderson 1209dd0a0fcdSRichard Henderson void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1210dd0a0fcdSRichard Henderson { 1211dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1212dd0a0fcdSRichard Henderson intptr_t i; 1213dd0a0fcdSRichard Henderson 1214dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1215dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1216dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1217dd0a0fcdSRichard Henderson int32_t dd = aa < bb ? aa : bb; 1218dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1219dd0a0fcdSRichard Henderson } 1220dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1221dd0a0fcdSRichard Henderson } 1222dd0a0fcdSRichard Henderson 1223dd0a0fcdSRichard Henderson void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1224dd0a0fcdSRichard Henderson { 1225dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1226dd0a0fcdSRichard Henderson intptr_t i; 1227dd0a0fcdSRichard Henderson 1228dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1229dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1230dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1231dd0a0fcdSRichard Henderson int64_t dd = aa < bb ? aa : bb; 1232dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1233dd0a0fcdSRichard Henderson } 1234dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1235dd0a0fcdSRichard Henderson } 1236dd0a0fcdSRichard Henderson 1237dd0a0fcdSRichard Henderson void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1238dd0a0fcdSRichard Henderson { 1239dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1240dd0a0fcdSRichard Henderson intptr_t i; 1241dd0a0fcdSRichard Henderson 1242dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1243dd0a0fcdSRichard Henderson int8_t aa = *(int8_t *)(a + i); 1244dd0a0fcdSRichard Henderson int8_t bb = *(int8_t *)(b + i); 1245dd0a0fcdSRichard Henderson int8_t dd = aa > bb ? aa : bb; 1246dd0a0fcdSRichard Henderson *(int8_t *)(d + i) = dd; 1247dd0a0fcdSRichard Henderson } 1248dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1249dd0a0fcdSRichard Henderson } 1250dd0a0fcdSRichard Henderson 1251dd0a0fcdSRichard Henderson void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1252dd0a0fcdSRichard Henderson { 1253dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1254dd0a0fcdSRichard Henderson intptr_t i; 1255dd0a0fcdSRichard Henderson 1256dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1257dd0a0fcdSRichard Henderson int16_t aa = *(int16_t *)(a + i); 1258dd0a0fcdSRichard Henderson int16_t bb = *(int16_t *)(b + i); 1259dd0a0fcdSRichard Henderson int16_t dd = aa > bb ? aa : bb; 1260dd0a0fcdSRichard Henderson *(int16_t *)(d + i) = dd; 1261dd0a0fcdSRichard Henderson } 1262dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1263dd0a0fcdSRichard Henderson } 1264dd0a0fcdSRichard Henderson 1265dd0a0fcdSRichard Henderson void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1266dd0a0fcdSRichard Henderson { 1267dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1268dd0a0fcdSRichard Henderson intptr_t i; 1269dd0a0fcdSRichard Henderson 1270dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1271dd0a0fcdSRichard Henderson int32_t aa = *(int32_t *)(a + i); 1272dd0a0fcdSRichard Henderson int32_t bb = *(int32_t *)(b + i); 1273dd0a0fcdSRichard Henderson int32_t dd = aa > bb ? aa : bb; 1274dd0a0fcdSRichard Henderson *(int32_t *)(d + i) = dd; 1275dd0a0fcdSRichard Henderson } 1276dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1277dd0a0fcdSRichard Henderson } 1278dd0a0fcdSRichard Henderson 1279dd0a0fcdSRichard Henderson void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1280dd0a0fcdSRichard Henderson { 1281dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1282dd0a0fcdSRichard Henderson intptr_t i; 1283dd0a0fcdSRichard Henderson 1284dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1285dd0a0fcdSRichard Henderson int64_t aa = *(int64_t *)(a + i); 1286dd0a0fcdSRichard Henderson int64_t bb = *(int64_t *)(b + i); 1287dd0a0fcdSRichard Henderson int64_t dd = aa > bb ? aa : bb; 1288dd0a0fcdSRichard Henderson *(int64_t *)(d + i) = dd; 1289dd0a0fcdSRichard Henderson } 1290dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1291dd0a0fcdSRichard Henderson } 1292dd0a0fcdSRichard Henderson 1293dd0a0fcdSRichard Henderson void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1294dd0a0fcdSRichard Henderson { 1295dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1296dd0a0fcdSRichard Henderson intptr_t i; 1297dd0a0fcdSRichard Henderson 1298dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1299dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1300dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1301dd0a0fcdSRichard Henderson uint8_t dd = aa < bb ? aa : bb; 1302dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1303dd0a0fcdSRichard Henderson } 1304dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1305dd0a0fcdSRichard Henderson } 1306dd0a0fcdSRichard Henderson 1307dd0a0fcdSRichard Henderson void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1308dd0a0fcdSRichard Henderson { 1309dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1310dd0a0fcdSRichard Henderson intptr_t i; 1311dd0a0fcdSRichard Henderson 1312dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1313dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1314dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1315dd0a0fcdSRichard Henderson uint16_t dd = aa < bb ? aa : bb; 1316dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1317dd0a0fcdSRichard Henderson } 1318dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1319dd0a0fcdSRichard Henderson } 1320dd0a0fcdSRichard Henderson 1321dd0a0fcdSRichard Henderson void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1322dd0a0fcdSRichard Henderson { 1323dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1324dd0a0fcdSRichard Henderson intptr_t i; 1325dd0a0fcdSRichard Henderson 1326dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1327dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1328dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1329dd0a0fcdSRichard Henderson uint32_t dd = aa < bb ? aa : bb; 1330dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1331dd0a0fcdSRichard Henderson } 1332dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1333dd0a0fcdSRichard Henderson } 1334dd0a0fcdSRichard Henderson 1335dd0a0fcdSRichard Henderson void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1336dd0a0fcdSRichard Henderson { 1337dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1338dd0a0fcdSRichard Henderson intptr_t i; 1339dd0a0fcdSRichard Henderson 1340dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1341dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1342dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1343dd0a0fcdSRichard Henderson uint64_t dd = aa < bb ? aa : bb; 1344dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1345dd0a0fcdSRichard Henderson } 1346dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1347dd0a0fcdSRichard Henderson } 1348dd0a0fcdSRichard Henderson 1349dd0a0fcdSRichard Henderson void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1350dd0a0fcdSRichard Henderson { 1351dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1352dd0a0fcdSRichard Henderson intptr_t i; 1353dd0a0fcdSRichard Henderson 1354dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1355dd0a0fcdSRichard Henderson uint8_t aa = *(uint8_t *)(a + i); 1356dd0a0fcdSRichard Henderson uint8_t bb = *(uint8_t *)(b + i); 1357dd0a0fcdSRichard Henderson uint8_t dd = aa > bb ? aa : bb; 1358dd0a0fcdSRichard Henderson *(uint8_t *)(d + i) = dd; 1359dd0a0fcdSRichard Henderson } 1360dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1361dd0a0fcdSRichard Henderson } 1362dd0a0fcdSRichard Henderson 1363dd0a0fcdSRichard Henderson void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1364dd0a0fcdSRichard Henderson { 1365dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1366dd0a0fcdSRichard Henderson intptr_t i; 1367dd0a0fcdSRichard Henderson 1368dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1369dd0a0fcdSRichard Henderson uint16_t aa = *(uint16_t *)(a + i); 1370dd0a0fcdSRichard Henderson uint16_t bb = *(uint16_t *)(b + i); 1371dd0a0fcdSRichard Henderson uint16_t dd = aa > bb ? aa : bb; 1372dd0a0fcdSRichard Henderson *(uint16_t *)(d + i) = dd; 1373dd0a0fcdSRichard Henderson } 1374dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1375dd0a0fcdSRichard Henderson } 1376dd0a0fcdSRichard Henderson 1377dd0a0fcdSRichard Henderson void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1378dd0a0fcdSRichard Henderson { 1379dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1380dd0a0fcdSRichard Henderson intptr_t i; 1381dd0a0fcdSRichard Henderson 1382dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1383dd0a0fcdSRichard Henderson uint32_t aa = *(uint32_t *)(a + i); 1384dd0a0fcdSRichard Henderson uint32_t bb = *(uint32_t *)(b + i); 1385dd0a0fcdSRichard Henderson uint32_t dd = aa > bb ? aa : bb; 1386dd0a0fcdSRichard Henderson *(uint32_t *)(d + i) = dd; 1387dd0a0fcdSRichard Henderson } 1388dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1389dd0a0fcdSRichard Henderson } 1390dd0a0fcdSRichard Henderson 1391dd0a0fcdSRichard Henderson void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1392dd0a0fcdSRichard Henderson { 1393dd0a0fcdSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 1394dd0a0fcdSRichard Henderson intptr_t i; 1395dd0a0fcdSRichard Henderson 1396dd0a0fcdSRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1397dd0a0fcdSRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1398dd0a0fcdSRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1399dd0a0fcdSRichard Henderson uint64_t dd = aa > bb ? aa : bb; 1400dd0a0fcdSRichard Henderson *(uint64_t *)(d + i) = dd; 1401dd0a0fcdSRichard Henderson } 1402dd0a0fcdSRichard Henderson clear_high(d, oprsz, desc); 1403dd0a0fcdSRichard Henderson } 140438dc1294SRichard Henderson 140538dc1294SRichard Henderson void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) 140638dc1294SRichard Henderson { 140738dc1294SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 140838dc1294SRichard Henderson intptr_t i; 140938dc1294SRichard Henderson 1410*6c7ab301SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1411*6c7ab301SRichard Henderson uint64_t aa = *(uint64_t *)(a + i); 1412*6c7ab301SRichard Henderson uint64_t bb = *(uint64_t *)(b + i); 1413*6c7ab301SRichard Henderson uint64_t cc = *(uint64_t *)(c + i); 1414*6c7ab301SRichard Henderson *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa); 141538dc1294SRichard Henderson } 141638dc1294SRichard Henderson clear_high(d, oprsz, desc); 141738dc1294SRichard Henderson } 1418